From cb824349c7735bcbf5f3010c71f5d95894253712 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Mon, 20 Dec 2021 12:54:51 +0100
Subject: [PATCH 01/29] feat: add BLS12-378, a GT-strong SNARK-friendly inner
 curve

---
 ecc/bls12-377/bls12-377.go                    |    2 +-
 ecc/bls12-378/bls12-378.go                    |  126 +
 ecc/bls12-378/doc.go                          |   18 +
 ecc/bls12-378/fp/arith.go                     |   60 +
 ecc/bls12-378/fp/asm.go                       |   24 +
 ecc/bls12-378/fp/asm_noadx.go                 |   25 +
 ecc/bls12-378/fp/doc.go                       |   43 +
 ecc/bls12-378/fp/element.go                   | 1720 +++++++++++
 ecc/bls12-378/fp/element_exp.go               | 1040 +++++++
 ecc/bls12-378/fp/element_fuzz.go              |  152 +
 ecc/bls12-378/fp/element_mul_adx_amd64.s      |  836 +++++
 ecc/bls12-378/fp/element_mul_amd64.s          |  858 ++++++
 ecc/bls12-378/fp/element_ops_amd64.go         |   50 +
 ecc/bls12-378/fp/element_ops_amd64.s          |  452 +++
 ecc/bls12-378/fp/element_ops_noasm.go         |   78 +
 ecc/bls12-378/fp/element_test.go              | 2681 +++++++++++++++++
 ecc/bls12-378/fr/arith.go                     |   60 +
 ecc/bls12-378/fr/asm.go                       |   24 +
 ecc/bls12-378/fr/asm_noadx.go                 |   25 +
 ecc/bls12-378/fr/doc.go                       |   43 +
 ecc/bls12-378/fr/element.go                   | 1466 +++++++++
 ecc/bls12-378/fr/element_exp.go               |  642 ++++
 ecc/bls12-378/fr/element_fuzz.go              |  136 +
 ecc/bls12-378/fr/element_mul_adx_amd64.s      |  466 +++
 ecc/bls12-378/fr/element_mul_amd64.s          |  488 +++
 ecc/bls12-378/fr/element_ops_amd64.go         |   50 +
 ecc/bls12-378/fr/element_ops_amd64.s          |  340 +++
 ecc/bls12-378/fr/element_ops_noasm.go         |   78 +
 ecc/bls12-378/fr/element_test.go              | 2649 ++++++++++++++++
 ecc/bls12-378/fr/fft/doc.go                   |   18 +
 ecc/bls12-378/fr/fft/domain.go                |  293 ++
 ecc/bls12-378/fr/fft/domain_test.go           |   47 +
 ecc/bls12-378/fr/fft/fft.go                   |  318 ++
 ecc/bls12-378/fr/fft/fft_test.go              |  413 +++
 ecc/bls12-378/fr/fft/fuzz.go                  |   73 +
 ecc/bls12-378/fr/fft/fuzz_test.go             |   56 +
 ecc/bls12-378/fr/kzg/doc.go                   |   18 +
 ecc/bls12-378/fr/kzg/fuzz.go                  |   84 +
 ecc/bls12-378/fr/kzg/fuzz_test.go             |   56 +
 ecc/bls12-378/fr/kzg/kzg.go                   |  518 ++++
 ecc/bls12-378/fr/kzg/kzg_test.go              |  453 +++
 ecc/bls12-378/fr/kzg/marshal.go               |  138 +
 ecc/bls12-378/fr/mimc/doc.go                  |   18 +
 ecc/bls12-378/fr/mimc/fuzz.go                 |   34 +
 ecc/bls12-378/fr/mimc/mimc.go                 |  174 ++
 ecc/bls12-378/fr/permutation/doc.go           |   18 +
 ecc/bls12-378/fr/permutation/permutation.go   |  361 +++
 .../fr/permutation/permutation_test.go        |   94 +
 ecc/bls12-378/fr/plookup/doc.go               |   18 +
 ecc/bls12-378/fr/plookup/plookup_test.go      |  139 +
 ecc/bls12-378/fr/plookup/table.go             |  252 ++
 ecc/bls12-378/fr/plookup/vector.go            |  687 +++++
 ecc/bls12-378/fr/polynomial/doc.go            |   18 +
 ecc/bls12-378/fr/polynomial/polynomial.go     |  123 +
 .../fr/polynomial/polynomial_test.go          |  208 ++
 ecc/bls12-378/fuzz.go                         |   76 +
 ecc/bls12-378/fuzz_test.go                    |   56 +
 ecc/bls12-378/g1.go                           |  964 ++++++
 ecc/bls12-378/g1_test.go                      |  666 ++++
 ecc/bls12-378/g2.go                           |  978 ++++++
 ecc/bls12-378/g2_test.go                      |  685 +++++
 ecc/bls12-378/hash_to_curve.go                |  276 ++
 ecc/bls12-378/internal/fptower/asm.go         |   28 +
 ecc/bls12-378/internal/fptower/asm_noadx.go   |   25 +
 ecc/bls12-378/internal/fptower/e12.go         |  561 ++++
 ecc/bls12-378/internal/fptower/e12_pairing.go |  128 +
 ecc/bls12-378/internal/fptower/e12_test.go    |  492 +++
 ecc/bls12-378/internal/fptower/e2.go          |  262 ++
 ecc/bls12-378/internal/fptower/e2_amd64.go    |   45 +
 ecc/bls12-378/internal/fptower/e2_amd64.s     |  320 ++
 ecc/bls12-378/internal/fptower/e2_bls378.go   |  104 +
 ecc/bls12-378/internal/fptower/e2_fallback.go |   40 +
 ecc/bls12-378/internal/fptower/e2_test.go     |  506 ++++
 ecc/bls12-378/internal/fptower/e6.go          |  264 ++
 ecc/bls12-378/internal/fptower/e6_test.go     |  317 ++
 ecc/bls12-378/internal/fptower/frobenius.go   |  305 ++
 .../internal/fptower/generators_test.go       |   51 +
 ecc/bls12-378/marshal.go                      | 1160 +++++++
 ecc/bls12-378/marshal_test.go                 |  467 +++
 ecc/bls12-378/multiexp.go                     | 2303 ++++++++++++++
 ecc/bls12-378/multiexp_test.go                | 1349 +++++++++
 ecc/bls12-378/pairing.go                      |  241 ++
 ecc/bls12-378/pairing_test.go                 |  305 ++
 ecc/bls12-378/twistededwards/doc.go           |   18 +
 ecc/bls12-378/twistededwards/eddsa/doc.go     |   22 +
 ecc/bls12-378/twistededwards/eddsa/eddsa.go   |  265 ++
 .../twistededwards/eddsa/eddsa_test.go        |  208 ++
 ecc/bls12-378/twistededwards/eddsa/marshal.go |  133 +
 ecc/bls12-378/twistededwards/point.go         |  411 +++
 .../twistededwards/twistededwards_test.go     |  456 +++
 ecc/ecc.go                                    |    9 +-
 ...da71edd87ff573bf9ed04a00009948a20000000000 |  Bin 0 -> 2295 bytes
 ...b068524ebfe74bfbb5411600004ca4510000000000 |  Bin 0 -> 3532 bytes
 ...65d630ef0ff69c7b761ffd5cefe7b4128000265228 |  Bin 0 -> 1907 bytes
 ...08c9a60370d83429275ff3a5fddaa08b0000265228 |  Bin 0 -> 3158 bytes
 internal/generator/config/bls12-378.go        |   29 +
 internal/generator/ecc/template/point.go.tmpl |    6 +-
 97 files changed, 33786 insertions(+), 6 deletions(-)
 create mode 100644 ecc/bls12-378/bls12-378.go
 create mode 100644 ecc/bls12-378/doc.go
 create mode 100644 ecc/bls12-378/fp/arith.go
 create mode 100644 ecc/bls12-378/fp/asm.go
 create mode 100644 ecc/bls12-378/fp/asm_noadx.go
 create mode 100644 ecc/bls12-378/fp/doc.go
 create mode 100644 ecc/bls12-378/fp/element.go
 create mode 100644 ecc/bls12-378/fp/element_exp.go
 create mode 100644 ecc/bls12-378/fp/element_fuzz.go
 create mode 100644 ecc/bls12-378/fp/element_mul_adx_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_mul_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_ops_amd64.go
 create mode 100644 ecc/bls12-378/fp/element_ops_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_ops_noasm.go
 create mode 100644 ecc/bls12-378/fp/element_test.go
 create mode 100644 ecc/bls12-378/fr/arith.go
 create mode 100644 ecc/bls12-378/fr/asm.go
 create mode 100644 ecc/bls12-378/fr/asm_noadx.go
 create mode 100644 ecc/bls12-378/fr/doc.go
 create mode 100644 ecc/bls12-378/fr/element.go
 create mode 100644 ecc/bls12-378/fr/element_exp.go
 create mode 100644 ecc/bls12-378/fr/element_fuzz.go
 create mode 100644 ecc/bls12-378/fr/element_mul_adx_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_mul_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_ops_amd64.go
 create mode 100644 ecc/bls12-378/fr/element_ops_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_ops_noasm.go
 create mode 100644 ecc/bls12-378/fr/element_test.go
 create mode 100644 ecc/bls12-378/fr/fft/doc.go
 create mode 100644 ecc/bls12-378/fr/fft/domain.go
 create mode 100644 ecc/bls12-378/fr/fft/domain_test.go
 create mode 100644 ecc/bls12-378/fr/fft/fft.go
 create mode 100644 ecc/bls12-378/fr/fft/fft_test.go
 create mode 100644 ecc/bls12-378/fr/fft/fuzz.go
 create mode 100644 ecc/bls12-378/fr/fft/fuzz_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/doc.go
 create mode 100644 ecc/bls12-378/fr/kzg/fuzz.go
 create mode 100644 ecc/bls12-378/fr/kzg/fuzz_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/kzg.go
 create mode 100644 ecc/bls12-378/fr/kzg/kzg_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/marshal.go
 create mode 100644 ecc/bls12-378/fr/mimc/doc.go
 create mode 100644 ecc/bls12-378/fr/mimc/fuzz.go
 create mode 100644 ecc/bls12-378/fr/mimc/mimc.go
 create mode 100644 ecc/bls12-378/fr/permutation/doc.go
 create mode 100644 ecc/bls12-378/fr/permutation/permutation.go
 create mode 100644 ecc/bls12-378/fr/permutation/permutation_test.go
 create mode 100644 ecc/bls12-378/fr/plookup/doc.go
 create mode 100644 ecc/bls12-378/fr/plookup/plookup_test.go
 create mode 100644 ecc/bls12-378/fr/plookup/table.go
 create mode 100644 ecc/bls12-378/fr/plookup/vector.go
 create mode 100644 ecc/bls12-378/fr/polynomial/doc.go
 create mode 100644 ecc/bls12-378/fr/polynomial/polynomial.go
 create mode 100644 ecc/bls12-378/fr/polynomial/polynomial_test.go
 create mode 100644 ecc/bls12-378/fuzz.go
 create mode 100644 ecc/bls12-378/fuzz_test.go
 create mode 100644 ecc/bls12-378/g1.go
 create mode 100644 ecc/bls12-378/g1_test.go
 create mode 100644 ecc/bls12-378/g2.go
 create mode 100644 ecc/bls12-378/g2_test.go
 create mode 100644 ecc/bls12-378/hash_to_curve.go
 create mode 100644 ecc/bls12-378/internal/fptower/asm.go
 create mode 100644 ecc/bls12-378/internal/fptower/asm_noadx.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12_pairing.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_amd64.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_amd64.s
 create mode 100644 ecc/bls12-378/internal/fptower/e2_bls378.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_fallback.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/e6.go
 create mode 100644 ecc/bls12-378/internal/fptower/e6_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/frobenius.go
 create mode 100644 ecc/bls12-378/internal/fptower/generators_test.go
 create mode 100644 ecc/bls12-378/marshal.go
 create mode 100644 ecc/bls12-378/marshal_test.go
 create mode 100644 ecc/bls12-378/multiexp.go
 create mode 100644 ecc/bls12-378/multiexp_test.go
 create mode 100644 ecc/bls12-378/pairing.go
 create mode 100644 ecc/bls12-378/pairing_test.go
 create mode 100644 ecc/bls12-378/twistededwards/doc.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/doc.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/eddsa.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/marshal.go
 create mode 100644 ecc/bls12-378/twistededwards/point.go
 create mode 100644 ecc/bls12-378/twistededwards/twistededwards_test.go
 create mode 100644 internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
 create mode 100644 internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
 create mode 100644 internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
 create mode 100644 internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
 create mode 100644 internal/generator/config/bls12-378.go

diff --git a/ecc/bls12-377/bls12-377.go b/ecc/bls12-377/bls12-377.go
index e6dce675a..c41f651b7 100644
--- a/ecc/bls12-377/bls12-377.go
+++ b/ecc/bls12-377/bls12-377.go
@@ -111,7 +111,7 @@ func init() {
 	endo.u.A0.SetString("80949648264912719408558363140637477264845294720710499478137287262712535938301461879813459410946")
 	endo.v.A0.SetString("216465761340224619389371505802605247630151569547285782856803747159100223055385581585702401816380679166954762214499")
 
-	// binary decomposition of 15132376222941642752 little endian
+	// binary decomposition of 9586122913090633729 little endian
 	loopCounter = [64]int8{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1}
 
 	xGen.SetString("9586122913090633729", 10)
diff --git a/ecc/bls12-378/bls12-378.go b/ecc/bls12-378/bls12-378.go
new file mode 100644
index 000000000..842c373c6
--- /dev/null
+++ b/ecc/bls12-378/bls12-378.go
@@ -0,0 +1,126 @@
+package bls12378
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// E: y**2=x**3+1
+// Etwist: y**2 = x**3+u
+// Tower: Fp->Fp2, u**2=-5 -> Fp12, v**6=u
+// Generator (BLS12 family): x=11045256207009841153
+// optimal Ate loop: trace(frob)-1=x
+// trace of pi: x+1
+// Fp: p=605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+// Fr: r=14883435066912132899950318861128167269793560281114003360875131245101026639873
+
+// ID bls378 ID
+const ID = ecc.BLS12_378
+
+// bCurveCoeff b coeff of the curve
+var bCurveCoeff fp.Element
+
+// bTwistCurveCoeff b coeff of the twist (defined over Fp2) curve
+var bTwistCurveCoeff fptower.E2
+
+// generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+var g1Gen G1Jac
+var g2Gen G2Jac
+
+var g1GenAff G1Affine
+var g2GenAff G2Affine
+
+// point at infinity
+var g1Infinity G1Jac
+var g2Infinity G2Jac
+
+// optimal Ate loop counter (=trace-1 = x in BLS family)
+var loopCounter [64]int8
+
+// Parameters useful for the GLV scalar multiplication. The third roots define the
+//  endomorphisms phi1 and phi2 for <G1Affine> and <G2Affine>. lambda is such that <r, phi-lambda> lies above
+// <r> in the ring Z[phi]. More concretely it's the associated eigenvalue
+// of phi1 (resp phi2) restricted to <G1Affine> (resp <G2Affine>)
+// cf https://www.cosic.esat.kuleuven.be/nessie/reports/phase2/GLV.pdf
+var thirdRootOneG1 fp.Element
+var thirdRootOneG2 fp.Element
+var lambdaGLV big.Int
+
+// glvBasis stores R-linearly independant vectors (a,b), (c,d)
+// in ker((u,v)->u+vlambda[r]), and their determinant
+var glvBasis ecc.Lattice
+
+// psi o pi o psi**-1, where psi:E->E' is the degree 6 iso defined over Fp12
+var endo struct {
+	u fptower.E2
+	v fptower.E2
+}
+
+// generator of the curve
+var xGen big.Int
+
+// expose the tower -- github.com/consensys/gnark uses it in a gnark circuit
+
+// E2 is a degree two finite field extension of fp.Element
+type E2 = fptower.E2
+
+// E6 is a degree three finite field extension of fp2
+type E6 = fptower.E6
+
+// E12 is a degree two finite field extension of fp6
+type E12 = fptower.E12
+
+func init() {
+
+	bCurveCoeff.SetUint64(1)
+	bTwistCurveCoeff.A1.SetUint64(1) // M-twist
+
+	// E(3,y) * cofactor
+	g1Gen.X.SetString("302027100877540500544138164010696035562809807233645104772290911818386302983750063098216015456036850656714568735197")
+	g1Gen.Y.SetString("232851047397483214541821965369374725182070455016459237170823497053622811786333462699984177726412751508198874482530")
+	g1Gen.Z.SetString("1")
+
+	// E'(1,y) * cofactor'
+	g2Gen.X.SetString("470810816643554779222760025249941413452299198622737082648784137654933833261310635469274149014014206108405592809732",
+		"317092959336227428400228502739777439718827088477410533227996105067347670094088101088421556743730925535231685964487")
+	g2Gen.Y.SetString("248853758964950314624408411876149087897475217517523838449839260719963153199419627931373025216041741725848318074460",
+		"389162134924826972299508957175841717907876177152103852864177212390074067430801162403069988146334006672491106545644")
+	g2Gen.Z.SetString("1",
+		"0")
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	g1Infinity.X.SetOne()
+	g1Infinity.Y.SetOne()
+	g2Infinity.X.SetOne()
+	g2Infinity.Y.SetOne()
+
+	thirdRootOneG1.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337729")
+	thirdRootOneG2.Square(&thirdRootOneG1)
+	lambdaGLV.SetString("121997684678489422961514670190292369408", 10) //(x**2-1)
+	_r := fr.Modulus()
+	ecc.PrecomputeLattice(_r, &lambdaGLV, &glvBasis)
+
+	endo.u.A0.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337730")
+	endo.v.A0.SetString("595603361117066405543541008735167904222384847192046901135681663787023479658010166685728902742824780272831835669219")
+
+	// binary decomposition of 11045256207009841153 little endian
+	loopCounter = [64]int8{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1}
+
+	xGen.SetString("11045256207009841153", 10)
+
+}
+
+// Generators return the generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+func Generators() (g1Jac G1Jac, g2Jac G2Jac, g1Aff G1Affine, g2Aff G2Affine) {
+	g1Aff = g1GenAff
+	g2Aff = g2GenAff
+	g1Jac = g1Gen
+	g2Jac = g2Gen
+	return
+}
diff --git a/ecc/bls12-378/doc.go b/ecc/bls12-378/doc.go
new file mode 100644
index 000000000..cd73fedbd
--- /dev/null
+++ b/ecc/bls12-378/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package bls12378 efficient elliptic curve and pairing implementation for bls12-378.
+package bls12378
diff --git a/ecc/bls12-378/fp/arith.go b/ecc/bls12-378/fp/arith.go
new file mode 100644
index 000000000..66fa66748
--- /dev/null
+++ b/ecc/bls12-378/fp/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bls12-378/fp/asm.go b/ecc/bls12-378/fp/asm.go
new file mode 100644
index 000000000..7344271eb
--- /dev/null
+++ b/ecc/bls12-378/fp/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bls12-378/fp/asm_noadx.go b/ecc/bls12-378/fp/asm_noadx.go
new file mode 100644
index 000000000..ae778bd3a
--- /dev/null
+++ b/ecc/bls12-378/fp/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/fp/doc.go b/ecc/bls12-378/fp/doc.go
new file mode 100644
index 000000000..dd844b5dc
--- /dev/null
+++ b/ecc/bls12-378/fp/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fp contains field arithmetic operations for modulus = 0x3eeb04...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [6]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f76a822c00009948a20000000001 // base 16
+// 	605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417 // base 10
+package fp
diff --git a/ecc/bls12-378/fp/element.go b/ecc/bls12-378/fp/element.go
new file mode 100644
index 000000000..69e071293
--- /dev/null
+++ b/ecc/bls12-378/fp/element.go
@@ -0,0 +1,1720 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 6 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+type Element [6]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 6
+
+// Bits number bits needed to represent Element
+const Bits = 378
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 11045256207009841153
+const qElementWord1 uint64 = 14886639130118979584
+const qElementWord2 uint64 = 10956628289047010687
+const qElementWord3 uint64 = 9513184293603517222
+const qElementWord4 uint64 = 6038022134869067682
+const qElementWord5 uint64 = 283357621510263184
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 11045256207009841151
+
+// rSquare
+var rSquare = Element{
+	13541478318970833666,
+	5510290684934426267,
+	8467587974331926354,
+	13931463632695577534,
+	3531303697457869800,
+	51529254522778566,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 1481365419032838079
+	z[1] = 10045892448872562649
+	z[2] = 7242180086616818316
+	z[3] = 8832319421896135475
+	z[4] = 13356930855120736188
+	z[5] = 28498675542444634
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 6 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 5522628103504920577, 0)
+	_, b = bits.Sub64(_z[1], 16666691601914265600, b)
+	_, b = bits.Sub64(_z[2], 5478314144523505343, b)
+	_, b = bits.Sub64(_z[3], 4756592146801758611, b)
+	_, b = bits.Sub64(_z[4], 3019011067434533841, b)
+	_, b = bits.Sub64(_z[5], 141678810755131592, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [48]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[5] %= 283357621510263184
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], carry = bits.Add64(z[1], 14886639130118979584, carry)
+		z[2], carry = bits.Add64(z[2], 10956628289047010687, carry)
+		z[3], carry = bits.Add64(z[3], 9513184293603517222, carry)
+		z[4], carry = bits.Add64(z[4], 6038022134869067682, carry)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [6]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		z[5], z[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [6]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 14886639130118979584, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 10956628289047010687, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 9513184293603517222, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 6038022134869067682, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		t[5], t[4] = madd3(m, 283357621510263184, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, z[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, z[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, z[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, z[3] = madd2(m, 6038022134869067682, c2, t[4])
+		z[5], z[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], _ = bits.Add64(x[5], y[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], _ = bits.Add64(x[5], x[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], c = bits.Add64(z[1], 14886639130118979584, c)
+		z[2], c = bits.Add64(z[2], 10956628289047010687, c)
+		z[3], c = bits.Add64(z[3], 9513184293603517222, c)
+		z[4], c = bits.Add64(z[4], 6038022134869067682, c)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(11045256207009841153, x[0], 0)
+	z[1], borrow = bits.Sub64(14886639130118979584, x[1], borrow)
+	z[2], borrow = bits.Sub64(10956628289047010687, x[2], borrow)
+	z[3], borrow = bits.Sub64(9513184293603517222, x[3], borrow)
+	z[4], borrow = bits.Sub64(6038022134869067682, x[4], borrow)
+	z[5], _ = bits.Sub64(283357621510263184, x[5], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[40:48], z[0])
+	binary.BigEndian.PutUint64(b[32:40], z[1])
+	binary.BigEndian.PutUint64(b[24:32], z[2])
+	binary.BigEndian.PutUint64(b[16:24], z[3])
+	binary.BigEndian.PutUint64(b[8:16], z[4])
+	binary.BigEndian.PutUint64(b[0:8], z[5])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[40:48], _z[0])
+	binary.BigEndian.PutUint64(res[32:40], _z[1])
+	binary.BigEndian.PutUint64(res[24:32], _z[2])
+	binary.BigEndian.PutUint64(res[16:24], _z[3])
+	binary.BigEndian.PutUint64(res[8:16], _z[4])
+	binary.BigEndian.PutUint64(res[0:8], _z[5])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[5] == 28498675542444634) && (l[4] == 13356930855120736188) && (l[3] == 8832319421896135475) && (l[2] == 7242180086616818316) && (l[1] == 10045892448872562649) && (l[0] == 1481365419032838079) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		15655215628902554004,
+		15894127656167592378,
+		9702012166408397168,
+		12335982559306940759,
+		1313802173610541430,
+		81629743607937133,
+	}
+	r := uint64(41)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 851295657643717122
+const inversionCorrectionFactorWord1 = 10857859049187504913
+const inversionCorrectionFactorWord2 = 7148604188520083019
+const inversionCorrectionFactorWord3 = 1138623559447261654
+const inversionCorrectionFactorWord4 = 1203095380280779597
+const inversionCorrectionFactorWord5 = 148579538565968037
+
+const invIterationsN = 26
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+
+	// the high word of m * qElement[5] is at most 62 bits
+	// x[5] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[6] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		z[5], z[4] = madd2(m, qElementWord5, t[i+5], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[5] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], _ = bits.Add64(neg1, qElementWord5, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 6 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 5
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bls12-378/fp/element_exp.go b/ecc/bls12-378/fp/element_exp.go
new file mode 100644
index 000000000..68439a4d7
--- /dev/null
+++ b/ecc/bls12-378/fp/element_exp.go
@@ -0,0 +1,1040 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// expBySqrtExp is equivalent to z.Exp(x, fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	return     ((_101001 + i386) << 6 + _101) << 3
+	//
+	// Operations: 330 squares 67 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 397: z = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	i399     = ((_101001 + i386) << 6 + _101) << 4 + 1
+	//	return     i399 << 40
+	//
+	// Operations: 371 squares 68 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 398: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca450
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	// Step 399: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca451
+	z.Mul(&x, z)
+
+	// Step 439: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
+	for s := 0; s < 40; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bls12-378/fp/element_fuzz.go b/ecc/bls12-378/fp/element_fuzz.go
new file mode 100644
index 000000000..0d948021a
--- /dev/null
+++ b/ecc/bls12-378/fp/element_fuzz.go
@@ -0,0 +1,152 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[5] %= qElement[5]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bls12-378/fp/element_mul_adx_amd64.s b/ecc/bls12-378/fp/element_mul_adx_amd64.s
new file mode 100644
index 000000000..a6f902c36
--- /dev/null
+++ b/ecc/bls12-378/fp/element_mul_adx_amd64.s
@@ -0,0 +1,836 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
diff --git a/ecc/bls12-378/fp/element_mul_amd64.s b/ecc/bls12-378/fp/element_mul_amd64.s
new file mode 100644
index 000000000..171a75360
--- /dev/null
+++ b/ecc/bls12-378/fp/element_mul_amd64.s
@@ -0,0 +1,858 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bls12-378/fp/element_ops_amd64.go b/ecc/bls12-378/fp/element_ops_amd64.go
new file mode 100644
index 000000000..73a3711ec
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bls12-378/fp/element_ops_amd64.s b/ecc/bls12-378/fp/element_ops_amd64.s
new file mode 100644
index 000000000..97da07d77
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_amd64.s
@@ -0,0 +1,452 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 0(R8)
+	MOVQ    DX, 8(R8)
+	MOVQ    CX, 16(R8)
+	MOVQ    BX, 24(R8)
+	MOVQ    SI, 32(R8)
+	MOVQ    DI, 40(R8)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R9
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x9948a20000000001, R10
+	SUBQ  DX, R10
+	MOVQ  R10, 0(R9)
+	MOVQ  $0xce97f76a822c0000, R10
+	SBBQ  CX, R10
+	MOVQ  R10, 8(R9)
+	MOVQ  $0x980dc360d0a49d7f, R10
+	SBBQ  BX, R10
+	MOVQ  R10, 16(R9)
+	MOVQ  $0x84059eb647102326, R10
+	SBBQ  SI, R10
+	MOVQ  R10, 24(R9)
+	MOVQ  $0x53cb5d240ed107a2, R10
+	SBBQ  DI, R10
+	MOVQ  R10, 32(R9)
+	MOVQ  $0x03eeb0416684d190, R10
+	SBBQ  R8, R10
+	MOVQ  R10, 40(R9)
+	RET
+
+l1:
+	MOVQ AX, 0(R9)
+	MOVQ AX, 8(R9)
+	MOVQ AX, 16(R9)
+	MOVQ AX, 24(R9)
+	MOVQ AX, 32(R9)
+	MOVQ AX, 40(R9)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
+	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $40-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ R15, DX
+	ADCQ s0-8(SP), CX
+	ADCQ s1-16(SP), BX
+	ADCQ s2-24(SP), SI
+	ADCQ s3-32(SP), DI
+	ADCQ s4-40(SP), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $48-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    32(AX), R8
+	MOVQ    40(AX), R9
+	MOVQ    CX, R10
+	MOVQ    BX, R11
+	MOVQ    SI, R12
+	MOVQ    DI, R13
+	MOVQ    R8, R14
+	MOVQ    R9, R15
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	ADCQ    32(DX), R8
+	ADCQ    40(DX), R9
+	SUBQ    0(DX), R10
+	SBBQ    8(DX), R11
+	SBBQ    16(DX), R12
+	SBBQ    24(DX), R13
+	SBBQ    32(DX), R14
+	SBBQ    40(DX), R15
+	MOVQ    CX, s0-8(SP)
+	MOVQ    BX, s1-16(SP)
+	MOVQ    SI, s2-24(SP)
+	MOVQ    DI, s3-32(SP)
+	MOVQ    R8, s4-40(SP)
+	MOVQ    R9, s5-48(SP)
+	MOVQ    $0x9948a20000000001, CX
+	MOVQ    $0xce97f76a822c0000, BX
+	MOVQ    $0x980dc360d0a49d7f, SI
+	MOVQ    $0x84059eb647102326, DI
+	MOVQ    $0x53cb5d240ed107a2, R8
+	MOVQ    $0x03eeb0416684d190, R9
+	CMOVQCC AX, CX
+	CMOVQCC AX, BX
+	CMOVQCC AX, SI
+	CMOVQCC AX, DI
+	CMOVQCC AX, R8
+	CMOVQCC AX, R9
+	ADDQ    CX, R10
+	ADCQ    BX, R11
+	ADCQ    SI, R12
+	ADCQ    DI, R13
+	ADCQ    R8, R14
+	ADCQ    R9, R15
+	MOVQ    s0-8(SP), CX
+	MOVQ    s1-16(SP), BX
+	MOVQ    s2-24(SP), SI
+	MOVQ    s3-32(SP), DI
+	MOVQ    s4-40(SP), R8
+	MOVQ    s5-48(SP), R9
+	MOVQ    R10, 0(DX)
+	MOVQ    R11, 8(DX)
+	MOVQ    R12, 16(DX)
+	MOVQ    R13, 24(DX)
+	MOVQ    R14, 32(DX)
+	MOVQ    R15, 40(DX)
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
diff --git a/ecc/bls12-378/fp/element_ops_noasm.go b/ecc/bls12-378/fp/element_ops_noasm.go
new file mode 100644
index 000000000..fec628918
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bls12-378/fp/element_test.go b/ecc/bls12-378/fp/element_test.go
new file mode 100644
index 000000000..72b71ebc5
--- /dev/null
+++ b/ecc/bls12-378/fp/element_test.go
@@ -0,0 +1,2681 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[5]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[5]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000", 16)
+	const sqrtExponentElement = "fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[5] != ^uint64(0) {
+			g.element[5] %= (qElement[5] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[5] != ^uint64(0) {
+				g.element[5] %= (qElement[5] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[5] != ^uint64(0) {
+				g[5] %= (qElement[5] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[5] != ^uint64(0) {
+					g[5] %= (qElement[5] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], _ = bits.Add64(a[5], qElement[5], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bls12-378/fr/arith.go b/ecc/bls12-378/fr/arith.go
new file mode 100644
index 000000000..83c9fd9ef
--- /dev/null
+++ b/ecc/bls12-378/fr/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bls12-378/fr/asm.go b/ecc/bls12-378/fr/asm.go
new file mode 100644
index 000000000..8241357c4
--- /dev/null
+++ b/ecc/bls12-378/fr/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bls12-378/fr/asm_noadx.go b/ecc/bls12-378/fr/asm_noadx.go
new file mode 100644
index 000000000..221beab93
--- /dev/null
+++ b/ecc/bls12-378/fr/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/fr/doc.go b/ecc/bls12-378/fr/doc.go
new file mode 100644
index 000000000..2425cb964
--- /dev/null
+++ b/ecc/bls12-378/fr/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fr contains field arithmetic operations for modulus = 0x20e7b9...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [4]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da09400013291440000000001 // base 16
+// 	14883435066912132899950318861128167269793560281114003360875131245101026639873 // base 10
+package fr
diff --git a/ecc/bls12-378/fr/element.go b/ecc/bls12-378/fr/element.go
new file mode 100644
index 000000000..b84a12c1b
--- /dev/null
+++ b/ecc/bls12-378/fr/element.go
@@ -0,0 +1,1466 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 4 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 14883435066912132899950318861128167269793560281114003360875131245101026639873
+type Element [4]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 4
+
+// Bits number bits needed to represent Element
+const Bits = 254
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 14883435066912132899950318861128167269793560281114003360875131245101026639873
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 3643768340310130689
+const qElementWord1 uint64 = 16926637627159085057
+const qElementWord2 uint64 = 9761692607219216639
+const qElementWord3 uint64 = 2371068001496280753
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 3643768340310130687
+
+// rSquare
+var rSquare = Element{
+	1260465344847950704,
+	15627634503313390135,
+	1085346480195626314,
+	405261321576397495,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("14883435066912132899950318861128167269793560281114003360875131245101026639873", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 11387109765248188409
+	z[1] = 10640745125853265911
+	z[2] = 5455128044303689984
+	z[3] = 1849268063235586341
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 4 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 11045256207009841153, 0)
+	_, b = bits.Sub64(_z[1], 17686690850434318336, b)
+	_, b = bits.Sub64(_z[2], 14104218340464384127, b)
+	_, b = bits.Sub64(_z[3], 1185534000748140376, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [32]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[3] %= 2371068001496280753
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 3643768340310130689, 0)
+		z[1], carry = bits.Add64(z[1], 16926637627159085057, carry)
+		z[2], carry = bits.Add64(z[2], 9761692607219216639, carry)
+		z[3], _ = bits.Add64(z[3], 2371068001496280753, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [4]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		z[3], z[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [4]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 16926637627159085057, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 9761692607219216639, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		t[3], t[2] = madd3(m, 2371068001496280753, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, t[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, t[1] = madd2(m, 9761692607219216639, c2, t[2])
+		t[3], t[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, t[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, t[1] = madd2(m, 9761692607219216639, c2, t[2])
+		t[3], t[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, z[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, z[1] = madd2(m, 9761692607219216639, c2, t[2])
+		z[3], z[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], _ = bits.Add64(x[3], y[3], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], _ = bits.Add64(x[3], x[3], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 3643768340310130689, 0)
+		z[1], c = bits.Add64(z[1], 16926637627159085057, c)
+		z[2], c = bits.Add64(z[2], 9761692607219216639, c)
+		z[3], _ = bits.Add64(z[3], 2371068001496280753, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(3643768340310130689, x[0], 0)
+	z[1], borrow = bits.Sub64(16926637627159085057, x[1], borrow)
+	z[2], borrow = bits.Sub64(9761692607219216639, x[2], borrow)
+	z[3], _ = bits.Sub64(2371068001496280753, x[3], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[24:32], z[0])
+	binary.BigEndian.PutUint64(b[16:24], z[1])
+	binary.BigEndian.PutUint64(b[8:16], z[2])
+	binary.BigEndian.PutUint64(b[0:8], z[3])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[24:32], _z[0])
+	binary.BigEndian.PutUint64(res[16:24], _z[1])
+	binary.BigEndian.PutUint64(res[8:16], _z[2])
+	binary.BigEndian.PutUint64(res[0:8], _z[3])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[3] == 1849268063235586341) && (l[2] == 5455128044303689984) && (l[1] == 10640745125853265911) && (l[0] == 11387109765248188409) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		4558548184074722573,
+		11721321436470045759,
+		14707307855974552649,
+		1565820507177503731,
+	}
+	r := uint64(42)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[3] == 1849268063235586341) && (t[2] == 5455128044303689984) && (t[1] == 10640745125853265911) && (t[0] == 11387109765248188409)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[3] == 1849268063235586341) && (t[2] == 5455128044303689984) && (t[1] == 10640745125853265911) && (t[0] == 11387109765248188409)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 11496758646349758257
+const inversionCorrectionFactorWord1 = 14106295395927053233
+const inversionCorrectionFactorWord2 = 9675338311035607220
+const inversionCorrectionFactorWord3 = 300574624876614870
+
+const invIterationsN = 18
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+
+	// the high word of m * qElement[3] is at most 62 bits
+	// x[3] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[4] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		z[3], z[2] = madd2(m, qElementWord3, t[i+3], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[3] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], _ = bits.Add64(neg1, qElementWord3, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 4 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 3
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bls12-378/fr/element_exp.go b/ecc/bls12-378/fr/element_exp.go
new file mode 100644
index 000000000..372af0051
--- /dev/null
+++ b/ecc/bls12-378/fr/element_exp.go
@@ -0,0 +1,642 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// expBySqrtExp is equivalent to z.Exp(x, 41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10       = 2*1
+	//	_100      = 2*_10
+	//	_101      = 1 + _100
+	//	_1010     = 2*_101
+	//	_1111     = _101 + _1010
+	//	_10011    = _100 + _1111
+	//	_10100    = 1 + _10011
+	//	_11101    = _1010 + _10011
+	//	_101100   = _1111 + _11101
+	//	_1001001  = _11101 + _101100
+	//	_1001101  = _100 + _1001001
+	//	_1001111  = _10 + _1001101
+	//	_1010011  = _100 + _1001111
+	//	_1011100  = _1111 + _1001101
+	//	_10101011 = _1001111 + _1011100
+	//	_10111110 = _10011 + _10101011
+	//	_11001000 = _1010 + _10111110
+	//	i18       = 2*_11001000
+	//	i19       = _10101011 + i18
+	//	i20       = _1001001 + i19
+	//	i21       = i18 + i20
+	//	i22       = _1001101 + i21
+	//	i23       = _1010011 + i22
+	//	i24       = _1001001 + i23
+	//	i25       = i20 + i24
+	//	i26       = _1111 + i25
+	//	i27       = i19 + i26
+	//	i28       = i22 + i27
+	//	i29       = i24 + i28
+	//	i30       = _10111110 + i29
+	//	i31       = _101100 + i30
+	//	i32       = i25 + i31
+	//	i33       = i30 + i32
+	//	i34       = i28 + i33
+	//	i35       = _10100 + i34
+	//	i36       = i21 + i35
+	//	i37       = i32 + i36
+	//	i38       = i27 + i37
+	//	i39       = i31 + i38
+	//	i40       = i23 + i39
+	//	i41       = 2*i36
+	//	i42       = i38 + i40
+	//	i43       = _1011100 + i42
+	//	i92       = ((i41 << 16 + i42) << 14 + i33) << 17
+	//	i129      = ((i37 + i92) << 20 + i26 + i43) << 14
+	//	i168      = ((i34 + i129) << 17 + i35) << 19 + i40
+	//	i209      = ((i168 << 17 + i43) << 17 + i39) << 5
+	//	i248      = ((_101 + i209) << 30 + i29) << 6 + _101
+	//	return      i248 << 3
+	//
+	// Operations: 200 squares 51 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10 Element
+	// Step 1: t3 = x^0x2
+	t3.Square(&x)
+
+	// Step 2: t2 = x^0x4
+	t2.Square(t3)
+
+	// Step 3: z = x^0x5
+	z.Mul(&x, t2)
+
+	// Step 4: t9 = x^0xa
+	t9.Square(z)
+
+	// Step 5: t6 = x^0xf
+	t6.Mul(z, t9)
+
+	// Step 6: t8 = x^0x13
+	t8.Mul(t2, t6)
+
+	// Step 7: t4 = x^0x14
+	t4.Mul(&x, t8)
+
+	// Step 8: t0 = x^0x1d
+	t0.Mul(t9, t8)
+
+	// Step 9: t1 = x^0x2c
+	t1.Mul(t6, t0)
+
+	// Step 10: t0 = x^0x49
+	t0.Mul(t0, t1)
+
+	// Step 11: t5 = x^0x4d
+	t5.Mul(t2, t0)
+
+	// Step 12: t7 = x^0x4f
+	t7.Mul(t3, t5)
+
+	// Step 13: t3 = x^0x53
+	t3.Mul(t2, t7)
+
+	// Step 14: t2 = x^0x5c
+	t2.Mul(t6, t5)
+
+	// Step 15: t7 = x^0xab
+	t7.Mul(t7, t2)
+
+	// Step 16: t8 = x^0xbe
+	t8.Mul(t8, t7)
+
+	// Step 17: t9 = x^0xc8
+	t9.Mul(t9, t8)
+
+	// Step 18: t10 = x^0x190
+	t10.Square(t9)
+
+	// Step 19: t9 = x^0x23b
+	t9.Mul(t7, t10)
+
+	// Step 20: t7 = x^0x284
+	t7.Mul(t0, t9)
+
+	// Step 21: t10 = x^0x414
+	t10.Mul(t10, t7)
+
+	// Step 22: t5 = x^0x461
+	t5.Mul(t5, t10)
+
+	// Step 23: t3 = x^0x4b4
+	t3.Mul(t3, t5)
+
+	// Step 24: t0 = x^0x4fd
+	t0.Mul(t0, t3)
+
+	// Step 25: t7 = x^0x781
+	t7.Mul(t7, t0)
+
+	// Step 26: t6 = x^0x790
+	t6.Mul(t6, t7)
+
+	// Step 27: t9 = x^0x9cb
+	t9.Mul(t9, t6)
+
+	// Step 28: t5 = x^0xe2c
+	t5.Mul(t5, t9)
+
+	// Step 29: t0 = x^0x1329
+	t0.Mul(t0, t5)
+
+	// Step 30: t8 = x^0x13e7
+	t8.Mul(t8, t0)
+
+	// Step 31: t1 = x^0x1413
+	t1.Mul(t1, t8)
+
+	// Step 32: t7 = x^0x1b94
+	t7.Mul(t7, t1)
+
+	// Step 33: t8 = x^0x2f7b
+	t8.Mul(t8, t7)
+
+	// Step 34: t5 = x^0x3da7
+	t5.Mul(t5, t8)
+
+	// Step 35: t4 = x^0x3dbb
+	t4.Mul(t4, t5)
+
+	// Step 36: t10 = x^0x41cf
+	t10.Mul(t10, t4)
+
+	// Step 37: t7 = x^0x5d63
+	t7.Mul(t7, t10)
+
+	// Step 38: t9 = x^0x672e
+	t9.Mul(t9, t7)
+
+	// Step 39: t1 = x^0x7b41
+	t1.Mul(t1, t9)
+
+	// Step 40: t3 = x^0x7ff5
+	t3.Mul(t3, t1)
+
+	// Step 41: t10 = x^0x839e
+	t10.Square(t10)
+
+	// Step 42: t9 = x^0xe723
+	t9.Mul(t9, t3)
+
+	// Step 43: t2 = x^0xe77f
+	t2.Mul(t2, t9)
+
+	// Step 59: t10 = x^0x839e0000
+	for s := 0; s < 16; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 60: t9 = x^0x839ee723
+	t9.Mul(t9, t10)
+
+	// Step 74: t9 = x^0x20e7b9c8c000
+	for s := 0; s < 14; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 75: t8 = x^0x20e7b9c8ef7b
+	t8.Mul(t8, t9)
+
+	// Step 92: t8 = x^0x41cf7391def60000
+	for s := 0; s < 17; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 93: t7 = x^0x41cf7391def65d63
+	t7.Mul(t7, t8)
+
+	// Step 113: t7 = x^0x41cf7391def65d6300000
+	for s := 0; s < 20; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 114: t6 = x^0x41cf7391def65d6300790
+	t6.Mul(t6, t7)
+
+	// Step 115: t6 = x^0x41cf7391def65d630ef0f
+	t6.Mul(t2, t6)
+
+	// Step 129: t6 = x^0x1073dce477bd9758c3bc3c000
+	for s := 0; s < 14; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 130: t5 = x^0x1073dce477bd9758c3bc3fda7
+	t5.Mul(t5, t6)
+
+	// Step 147: t5 = x^0x20e7b9c8ef7b2eb187787fb4e0000
+	for s := 0; s < 17; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 148: t4 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb
+	t4.Mul(t4, t5)
+
+	// Step 167: t4 = x^0x1073dce477bd9758c3bc3fda71edd80000
+	for s := 0; s < 19; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 168: t3 = x^0x1073dce477bd9758c3bc3fda71edd87ff5
+	t3.Mul(t3, t4)
+
+	// Step 185: t3 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffea0000
+	for s := 0; s < 17; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 186: t2 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f
+	t2.Mul(t2, t3)
+
+	// Step 203: t2 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe0000
+	for s := 0; s < 17; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 204: t1 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b41
+	t1.Mul(t1, t2)
+
+	// Step 209: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6820
+	for s := 0; s < 5; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 210: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6825
+	t1.Mul(z, t1)
+
+	// Step 240: t1 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940000000
+	for s := 0; s < 30; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 241: t0 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940001329
+	t0.Mul(t0, t1)
+
+	// Step 247: t0 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 248: z = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca45
+	z.Mul(z, t0)
+
+	// Step 251: z = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10       = 2*1
+	//	_100      = 2*_10
+	//	_101      = 1 + _100
+	//	_1010     = 2*_101
+	//	_1111     = _101 + _1010
+	//	_10011    = _100 + _1111
+	//	_10100    = 1 + _10011
+	//	_11101    = _1010 + _10011
+	//	_101100   = _1111 + _11101
+	//	_1001001  = _11101 + _101100
+	//	_1001101  = _100 + _1001001
+	//	_1001111  = _10 + _1001101
+	//	_1010001  = _10 + _1001111
+	//	_1010011  = _10 + _1010001
+	//	_1011100  = _1111 + _1001101
+	//	_10101011 = _1001111 + _1011100
+	//	_10111110 = _10011 + _10101011
+	//	_11001000 = _1010 + _10111110
+	//	i19       = 2*_11001000
+	//	i20       = _10101011 + i19
+	//	i21       = _1001001 + i20
+	//	i22       = i19 + i21
+	//	i23       = _1001101 + i22
+	//	i24       = _1010011 + i23
+	//	i25       = _1001001 + i24
+	//	i26       = i21 + i25
+	//	i27       = _1111 + i26
+	//	i28       = i20 + i27
+	//	i29       = i23 + i28
+	//	i30       = i25 + i29
+	//	i31       = _10111110 + i30
+	//	i32       = _101100 + i31
+	//	i33       = i26 + i32
+	//	i34       = i31 + i33
+	//	i35       = i29 + i34
+	//	i36       = _10100 + i35
+	//	i37       = i22 + i36
+	//	i38       = i33 + i37
+	//	i39       = i28 + i38
+	//	i40       = i32 + i39
+	//	i41       = i24 + i40
+	//	i42       = 2*i37
+	//	i43       = i39 + i41
+	//	i44       = _1011100 + i43
+	//	i93       = ((i42 << 16 + i43) << 14 + i34) << 17
+	//	i130      = ((i38 + i93) << 20 + i27 + i44) << 14
+	//	i169      = ((i35 + i130) << 17 + i36) << 19 + i41
+	//	i210      = ((i169 << 17 + i44) << 17 + i40) << 5
+	//	i253      = ((_101 + i210) << 30 + i30) << 10 + _1010001
+	//	return      i253 << 41
+	//
+	// Operations: 242 squares 52 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11 Element
+	// Step 1: t3 = x^0x2
+	t3.Square(&x)
+
+	// Step 2: z = x^0x4
+	z.Square(t3)
+
+	// Step 3: t1 = x^0x5
+	t1.Mul(&x, z)
+
+	// Step 4: t10 = x^0xa
+	t10.Square(t1)
+
+	// Step 5: t7 = x^0xf
+	t7.Mul(t1, t10)
+
+	// Step 6: t9 = x^0x13
+	t9.Mul(z, t7)
+
+	// Step 7: t5 = x^0x14
+	t5.Mul(&x, t9)
+
+	// Step 8: t0 = x^0x1d
+	t0.Mul(t10, t9)
+
+	// Step 9: t2 = x^0x2c
+	t2.Mul(t7, t0)
+
+	// Step 10: t0 = x^0x49
+	t0.Mul(t0, t2)
+
+	// Step 11: t6 = x^0x4d
+	t6.Mul(z, t0)
+
+	// Step 12: t8 = x^0x4f
+	t8.Mul(t3, t6)
+
+	// Step 13: z = x^0x51
+	z.Mul(t3, t8)
+
+	// Step 14: t4 = x^0x53
+	t4.Mul(t3, z)
+
+	// Step 15: t3 = x^0x5c
+	t3.Mul(t7, t6)
+
+	// Step 16: t8 = x^0xab
+	t8.Mul(t8, t3)
+
+	// Step 17: t9 = x^0xbe
+	t9.Mul(t9, t8)
+
+	// Step 18: t10 = x^0xc8
+	t10.Mul(t10, t9)
+
+	// Step 19: t11 = x^0x190
+	t11.Square(t10)
+
+	// Step 20: t10 = x^0x23b
+	t10.Mul(t8, t11)
+
+	// Step 21: t8 = x^0x284
+	t8.Mul(t0, t10)
+
+	// Step 22: t11 = x^0x414
+	t11.Mul(t11, t8)
+
+	// Step 23: t6 = x^0x461
+	t6.Mul(t6, t11)
+
+	// Step 24: t4 = x^0x4b4
+	t4.Mul(t4, t6)
+
+	// Step 25: t0 = x^0x4fd
+	t0.Mul(t0, t4)
+
+	// Step 26: t8 = x^0x781
+	t8.Mul(t8, t0)
+
+	// Step 27: t7 = x^0x790
+	t7.Mul(t7, t8)
+
+	// Step 28: t10 = x^0x9cb
+	t10.Mul(t10, t7)
+
+	// Step 29: t6 = x^0xe2c
+	t6.Mul(t6, t10)
+
+	// Step 30: t0 = x^0x1329
+	t0.Mul(t0, t6)
+
+	// Step 31: t9 = x^0x13e7
+	t9.Mul(t9, t0)
+
+	// Step 32: t2 = x^0x1413
+	t2.Mul(t2, t9)
+
+	// Step 33: t8 = x^0x1b94
+	t8.Mul(t8, t2)
+
+	// Step 34: t9 = x^0x2f7b
+	t9.Mul(t9, t8)
+
+	// Step 35: t6 = x^0x3da7
+	t6.Mul(t6, t9)
+
+	// Step 36: t5 = x^0x3dbb
+	t5.Mul(t5, t6)
+
+	// Step 37: t11 = x^0x41cf
+	t11.Mul(t11, t5)
+
+	// Step 38: t8 = x^0x5d63
+	t8.Mul(t8, t11)
+
+	// Step 39: t10 = x^0x672e
+	t10.Mul(t10, t8)
+
+	// Step 40: t2 = x^0x7b41
+	t2.Mul(t2, t10)
+
+	// Step 41: t4 = x^0x7ff5
+	t4.Mul(t4, t2)
+
+	// Step 42: t11 = x^0x839e
+	t11.Square(t11)
+
+	// Step 43: t10 = x^0xe723
+	t10.Mul(t10, t4)
+
+	// Step 44: t3 = x^0xe77f
+	t3.Mul(t3, t10)
+
+	// Step 60: t11 = x^0x839e0000
+	for s := 0; s < 16; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 61: t10 = x^0x839ee723
+	t10.Mul(t10, t11)
+
+	// Step 75: t10 = x^0x20e7b9c8c000
+	for s := 0; s < 14; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 76: t9 = x^0x20e7b9c8ef7b
+	t9.Mul(t9, t10)
+
+	// Step 93: t9 = x^0x41cf7391def60000
+	for s := 0; s < 17; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 94: t8 = x^0x41cf7391def65d63
+	t8.Mul(t8, t9)
+
+	// Step 114: t8 = x^0x41cf7391def65d6300000
+	for s := 0; s < 20; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 115: t7 = x^0x41cf7391def65d6300790
+	t7.Mul(t7, t8)
+
+	// Step 116: t7 = x^0x41cf7391def65d630ef0f
+	t7.Mul(t3, t7)
+
+	// Step 130: t7 = x^0x1073dce477bd9758c3bc3c000
+	for s := 0; s < 14; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 131: t6 = x^0x1073dce477bd9758c3bc3fda7
+	t6.Mul(t6, t7)
+
+	// Step 148: t6 = x^0x20e7b9c8ef7b2eb187787fb4e0000
+	for s := 0; s < 17; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 149: t5 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb
+	t5.Mul(t5, t6)
+
+	// Step 168: t5 = x^0x1073dce477bd9758c3bc3fda71edd80000
+	for s := 0; s < 19; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 169: t4 = x^0x1073dce477bd9758c3bc3fda71edd87ff5
+	t4.Mul(t4, t5)
+
+	// Step 186: t4 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffea0000
+	for s := 0; s < 17; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 187: t3 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f
+	t3.Mul(t3, t4)
+
+	// Step 204: t3 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe0000
+	for s := 0; s < 17; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 205: t2 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b41
+	t2.Mul(t2, t3)
+
+	// Step 210: t2 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6820
+	for s := 0; s < 5; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 211: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6825
+	t1.Mul(t1, t2)
+
+	// Step 241: t1 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940000000
+	for s := 0; s < 30; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 242: t0 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940001329
+	t0.Mul(t0, t1)
+
+	// Step 252: t0 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca400
+	for s := 0; s < 10; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 253: z = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca451
+	z.Mul(z, t0)
+
+	// Step 294: z = x^0x1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
+	for s := 0; s < 41; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bls12-378/fr/element_fuzz.go b/ecc/bls12-378/fr/element_fuzz.go
new file mode 100644
index 000000000..a4c87eb25
--- /dev/null
+++ b/ecc/bls12-378/fr/element_fuzz.go
@@ -0,0 +1,136 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[3] %= qElement[3]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bls12-378/fr/element_mul_adx_amd64.s b/ecc/bls12-378/fr/element_mul_adx_amd64.s
new file mode 100644
index 000000000..35a9c7b30
--- /dev/null
+++ b/ecc/bls12-378/fr/element_mul_adx_amd64.s
@@ -0,0 +1,466 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), SI
+
+	// x[0] -> DI
+	// x[1] -> R8
+	// x[2] -> R9
+	// x[3] -> R10
+	MOVQ 0(SI), DI
+	MOVQ 8(SI), R8
+	MOVQ 16(SI), R9
+	MOVQ 24(SI), R10
+	MOVQ y+16(FP), R11
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R13
+	// t[2] -> CX
+	// t[3] -> BX
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R11), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ DI, R14, R13
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R8, AX, CX
+	ADOXQ AX, R13
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R9, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R13
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
diff --git a/ecc/bls12-378/fr/element_mul_amd64.s b/ecc/bls12-378/fr/element_mul_amd64.s
new file mode 100644
index 000000000..850f72813
--- /dev/null
+++ b/ecc/bls12-378/fr/element_mul_amd64.s
@@ -0,0 +1,488 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), SI
+
+	// x[0] -> DI
+	// x[1] -> R8
+	// x[2] -> R9
+	// x[3] -> R10
+	MOVQ 0(SI), DI
+	MOVQ 8(SI), R8
+	MOVQ 16(SI), R9
+	MOVQ 24(SI), R10
+	MOVQ y+16(FP), R11
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R13
+	// t[2] -> CX
+	// t[3] -> BX
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R11), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ DI, R14, R13
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R8, AX, CX
+	ADOXQ AX, R13
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R9, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R13
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bls12-378/fr/element_ops_amd64.go b/ecc/bls12-378/fr/element_ops_amd64.go
new file mode 100644
index 000000000..78022b3e6
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bls12-378/fr/element_ops_amd64.s b/ecc/bls12-378/fr/element_ops_amd64.s
new file mode 100644
index 000000000..a8d8f4ca4
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_amd64.s
@@ -0,0 +1,340 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ res+0(FP), R12
+	MOVQ CX, 0(R12)
+	MOVQ BX, 8(R12)
+	MOVQ SI, 16(R12)
+	MOVQ DI, 24(R12)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    DI, DI
+	MOVQ    x+8(FP), SI
+	MOVQ    0(SI), AX
+	MOVQ    8(SI), DX
+	MOVQ    16(SI), CX
+	MOVQ    24(SI), BX
+	MOVQ    y+16(FP), SI
+	SUBQ    0(SI), AX
+	SBBQ    8(SI), DX
+	SBBQ    16(SI), CX
+	SBBQ    24(SI), BX
+	MOVQ    $0x3291440000000001, R8
+	MOVQ    $0xeae77f3da0940001, R9
+	MOVQ    $0x87787fb4e3dbb0ff, R10
+	MOVQ    $0x20e7b9c8ef7b2eb1, R11
+	CMOVQCC DI, R8
+	CMOVQCC DI, R9
+	CMOVQCC DI, R10
+	CMOVQCC DI, R11
+	ADDQ    R8, AX
+	ADCQ    R9, DX
+	ADCQ    R10, CX
+	ADCQ    R11, BX
+	MOVQ    res+0(FP), R12
+	MOVQ    AX, 0(R12)
+	MOVQ    DX, 8(R12)
+	MOVQ    CX, 16(R12)
+	MOVQ    BX, 24(R12)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ res+0(FP), R11
+	MOVQ DX, 0(R11)
+	MOVQ CX, 8(R11)
+	MOVQ BX, 16(R11)
+	MOVQ SI, 24(R11)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), DI
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x3291440000000001, R8
+	SUBQ  DX, R8
+	MOVQ  R8, 0(DI)
+	MOVQ  $0xeae77f3da0940001, R8
+	SBBQ  CX, R8
+	MOVQ  R8, 8(DI)
+	MOVQ  $0x87787fb4e3dbb0ff, R8
+	SBBQ  BX, R8
+	MOVQ  R8, 16(DI)
+	MOVQ  $0x20e7b9c8ef7b2eb1, R8
+	SBBQ  SI, R8
+	MOVQ  R8, 24(DI)
+	RET
+
+l1:
+	MOVQ AX, 0(DI)
+	MOVQ AX, 8(DI)
+	MOVQ AX, 16(DI)
+	MOVQ AX, 24(DI)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
+	REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, R11
+	MOVQ CX, R12
+	MOVQ BX, R13
+	MOVQ SI, R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ R11, DX
+	ADCQ R12, CX
+	ADCQ R13, BX
+	ADCQ R14, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), NOSPLIT, $0-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    CX, R8
+	MOVQ    BX, R9
+	MOVQ    SI, R10
+	MOVQ    DI, R11
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	SUBQ    0(DX), R8
+	SBBQ    8(DX), R9
+	SBBQ    16(DX), R10
+	SBBQ    24(DX), R11
+	MOVQ    $0x3291440000000001, R12
+	MOVQ    $0xeae77f3da0940001, R13
+	MOVQ    $0x87787fb4e3dbb0ff, R14
+	MOVQ    $0x20e7b9c8ef7b2eb1, R15
+	CMOVQCC AX, R12
+	CMOVQCC AX, R13
+	CMOVQCC AX, R14
+	CMOVQCC AX, R15
+	ADDQ    R12, R8
+	ADCQ    R13, R9
+	ADCQ    R14, R10
+	ADCQ    R15, R11
+	MOVQ    R8, 0(DX)
+	MOVQ    R9, 8(DX)
+	MOVQ    R10, 16(DX)
+	MOVQ    R11, 24(DX)
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	RET
diff --git a/ecc/bls12-378/fr/element_ops_noasm.go b/ecc/bls12-378/fr/element_ops_noasm.go
new file mode 100644
index 000000000..ec1fac18d
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bls12-378/fr/element_test.go b/ecc/bls12-378/fr/element_test.go
new file mode 100644
index 000000000..34311fe7b
--- /dev/null
+++ b/ecc/bls12-378/fr/element_test.go
@@ -0,0 +1,2649 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		1260465344847950704,
+		15627634503313390135,
+		1085346480195626314,
+		405261321576397495,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		1260465344847950704,
+		15627634503313390135,
+		1085346480195626314,
+		405261321576397495,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[3]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[3]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000", 16)
+	const sqrtExponentElement = "41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[3] != ^uint64(0) {
+			g.element[3] %= (qElement[3] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[3] != ^uint64(0) {
+				g.element[3] %= (qElement[3] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[3] != ^uint64(0) {
+				g[3] %= (qElement[3] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[3] != ^uint64(0) {
+					g[3] %= (qElement[3] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], _ = bits.Add64(a[3], qElement[3], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bls12-378/fr/fft/doc.go b/ecc/bls12-378/fr/fft/doc.go
new file mode 100644
index 000000000..3c35170e8
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fft provides in-place discrete Fourier transform.
+package fft
diff --git a/ecc/bls12-378/fr/fft/domain.go b/ecc/bls12-378/fr/fft/domain.go
new file mode 100644
index 000000000..97ec9125e
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/domain.go
@@ -0,0 +1,293 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"fmt"
+	"io"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+// Domain with a power of 2 cardinality
+// compute a field element of order 2x and store it in FinerGenerator
+// all other values can be derived from x, GeneratorSqrt
+type Domain struct {
+	Cardinality             uint64
+	Depth                   uint64
+	PrecomputeReversedTable uint64 // uint64 so it is recognized by the decoder from gnark-crypto
+	CardinalityInv          fr.Element
+	Generator               fr.Element
+	GeneratorInv            fr.Element
+	FinerGenerator          fr.Element
+	FinerGeneratorInv       fr.Element
+
+	// the following slices are not serialized and are (re)computed through domain.preComputeTwiddles()
+
+	// Twiddles factor for the FFT using Generator for each stage of the recursive FFT
+	Twiddles [][]fr.Element
+
+	// Twiddles factor for the FFT using GeneratorInv for each stage of the recursive FFT
+	TwiddlesInv [][]fr.Element
+
+	// we precompute these mostly to avoid the memory intensive bit reverse permutation in the groth16.Prover
+
+	// CosetTable[i][j] = domain.Generator(i-th)Sqrt ^ j
+	// CosetTable = fft.BitReverse(CosetTable)
+	CosetTable         [][]fr.Element
+	CosetTableReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+
+	// CosetTable[i][j] = domain.Generator(i-th)SqrtInv ^ j
+	// CosetTableInv = fft.BitReverse(CosetTableInv)
+	CosetTableInv         [][]fr.Element
+	CosetTableInvReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+}
+
+// NewDomain returns a subgroup with a power of 2 cardinality
+// cardinality >= m
+// If depth>0, the Domain will also store a primitive (2**depth)*m root
+// of 1, with associated precomputed data. This allows to perform shifted
+// FFT/FFTInv.
+// If precomputeReversedCosetTable is set, the bit reversed cosetTable/cosetTableInv are precomputed.
+//
+// example:
+// --------
+//
+// * NewDomain(m, 0, false) outputs a new domain to perform the fft on Z/mZ.
+// * NewDomain(m, 2, false) outputs a new domain to perform fft on Z/mZ, plus a primitive
+// 2**2*m=4m-th root of 1 and associated data to compute fft/fftinv on the cosets of
+// (Z/4mZ)/(Z/mZ).
+func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
+
+	// generator of the largest 2-adic subgroup
+	var rootOfUnity fr.Element
+
+	domain := &Domain{}
+	x := ecc.NextPowerOfTwo(m)
+	domain.Cardinality = uint64(x)
+	domain.Depth = depth
+	if precomputeReversedTable {
+		domain.PrecomputeReversedTable = 1
+	}
+
+	// find generator for Z/2^(log(m))Z  and Z/2^(log(m)+cosets)Z
+	logx := uint64(bits.TrailingZeros64(x))
+	if logx > maxOrderRoot {
+		panic(fmt.Sprintf("m (%d) is too big: the required root of unity does not exist", m))
+	}
+	logGen := logx + depth
+	if logGen > maxOrderRoot {
+		panic("log(m) + cosets is too big: the required root of unity does not exist")
+	}
+
+	expo := uint64(1 << (maxOrderRoot - logGen))
+	bExpo := new(big.Int).SetUint64(expo)
+	domain.FinerGenerator.Exp(rootOfUnity, bExpo)
+	domain.FinerGeneratorInv.Inverse(&domain.FinerGenerator)
+
+	// Generator = FinerGenerator^2 has order x
+	expo = uint64(1 << (maxOrderRoot - logx))
+	bExpo.SetUint64(expo)
+	domain.Generator.Exp(rootOfUnity, bExpo) // order x
+	domain.GeneratorInv.Inverse(&domain.Generator)
+	domain.CardinalityInv.SetUint64(uint64(x)).Inverse(&domain.CardinalityInv)
+
+	// twiddle factors
+	domain.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if depth > 0 && precomputeReversedTable {
+		domain.reverseCosetTables()
+	}
+
+	return domain
+}
+
+func (d *Domain) reverseCosetTables() {
+	nbCosets := (1 << d.Depth) - 1
+	d.CosetTableReversed = make([][]fr.Element, nbCosets)
+	d.CosetTableInvReversed = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTableReversed[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInvReversed[i] = make([]fr.Element, d.Cardinality)
+		copy(d.CosetTableReversed[i], d.CosetTable[i])
+		copy(d.CosetTableInvReversed[i], d.CosetTableInv[i])
+		BitReverse(d.CosetTableReversed[i])
+		BitReverse(d.CosetTableInvReversed[i])
+	}
+}
+
+func (d *Domain) preComputeTwiddles() {
+
+	// nb fft stages
+	nbStages := uint64(bits.TrailingZeros64(d.Cardinality))
+	nbCosets := (1 << d.Depth) - 1
+
+	d.Twiddles = make([][]fr.Element, nbStages)
+	d.TwiddlesInv = make([][]fr.Element, nbStages)
+	d.CosetTable = make([][]fr.Element, nbCosets)
+	d.CosetTableInv = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTable[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInv[i] = make([]fr.Element, d.Cardinality)
+	}
+
+	var wg sync.WaitGroup
+
+	// for each fft stage, we pre compute the twiddle factors
+	twiddles := func(t [][]fr.Element, omega fr.Element) {
+		for i := uint64(0); i < nbStages; i++ {
+			t[i] = make([]fr.Element, 1+(1<<(nbStages-i-1)))
+			var w fr.Element
+			if i == 0 {
+				w = omega
+			} else {
+				w = t[i-1][2]
+			}
+			t[i][0] = fr.One()
+			t[i][1] = w
+			for j := 2; j < len(t[i]); j++ {
+				t[i][j].Mul(&t[i][j-1], &w)
+			}
+		}
+		wg.Done()
+	}
+
+	expTable := func(sqrt fr.Element, t []fr.Element) {
+		t[0] = fr.One()
+		precomputeExpTable(sqrt, t)
+		wg.Done()
+	}
+
+	if nbCosets > 0 {
+		cosetGens := make([]fr.Element, nbCosets)
+		cosetGensInv := make([]fr.Element, nbCosets)
+		cosetGens[0].Set(&d.FinerGenerator)
+		cosetGensInv[0].Set(&d.FinerGeneratorInv)
+		for i := 1; i < nbCosets; i++ {
+			cosetGens[i].Mul(&cosetGens[i-1], &d.FinerGenerator)
+			cosetGensInv[i].Mul(&cosetGensInv[i-1], &d.FinerGeneratorInv)
+		}
+		wg.Add(2 + 2*nbCosets)
+		go twiddles(d.Twiddles, d.Generator)
+		go twiddles(d.TwiddlesInv, d.GeneratorInv)
+		for i := 0; i < nbCosets-1; i++ {
+			go expTable(cosetGens[i], d.CosetTable[i])
+			go expTable(cosetGensInv[i], d.CosetTableInv[i])
+		}
+		go expTable(cosetGens[nbCosets-1], d.CosetTable[nbCosets-1])
+		expTable(cosetGensInv[nbCosets-1], d.CosetTableInv[nbCosets-1])
+
+		wg.Wait()
+
+	} else {
+		wg.Add(2)
+		go twiddles(d.Twiddles, d.Generator)
+		twiddles(d.TwiddlesInv, d.GeneratorInv)
+		wg.Wait()
+	}
+
+}
+
+func precomputeExpTable(w fr.Element, table []fr.Element) {
+	n := len(table)
+
+	// see if it makes sense to parallelize exp tables pre-computation
+	interval := 0
+	if runtime.NumCPU() >= 4 {
+		interval = (n - 1) / (runtime.NumCPU() / 4)
+	}
+
+	// this ratio roughly correspond to the number of multiplication one can do in place of a Exp operation
+	const ratioExpMul = 6000 / 17
+
+	if interval < ratioExpMul {
+		precomputeExpTableChunk(w, 1, table[1:])
+		return
+	}
+
+	// we parallelize
+	var wg sync.WaitGroup
+	for i := 1; i < n; i += interval {
+		start := i
+		end := i + interval
+		if end > n {
+			end = n
+		}
+		wg.Add(1)
+		go func() {
+			precomputeExpTableChunk(w, uint64(start), table[start:end])
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func precomputeExpTableChunk(w fr.Element, power uint64, table []fr.Element) {
+
+	// this condition ensures that creating a domain of size 1 with cosets don't fail
+	if len(table) > 0 {
+		table[0].Exp(w, new(big.Int).SetUint64(power))
+		for i := 1; i < len(table); i++ {
+			table[i].Mul(&table[i-1], &w)
+		}
+	}
+}
+
+// WriteTo writes a binary representation of the domain (without the precomputed twiddle factors)
+// to the provided writer
+func (d *Domain) WriteTo(w io.Writer) (int64, error) {
+
+	enc := curve.NewEncoder(w)
+
+	toEncode := []interface{}{d.Cardinality, d.Depth, d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom attempts to decode a domain from Reader
+func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
+
+	dec := curve.NewDecoder(r)
+
+	toDecode := []interface{}{&d.Cardinality, &d.Depth, &d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	d.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if d.Depth > 0 && d.PrecomputeReversedTable == 1 {
+		d.reverseCosetTables()
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bls12-378/fr/fft/domain_test.go b/ecc/bls12-378/fr/fft/domain_test.go
new file mode 100644
index 000000000..df72f0e3a
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/domain_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestDomainSerialization(t *testing.T) {
+
+	domain := NewDomain(1<<6, 1, true)
+	var reconstructed Domain
+
+	var buf bytes.Buffer
+	written, err := domain.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var read int64
+	read, err = reconstructed.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if written != read {
+		t.Fatal("didn't read as many bytes as we wrote")
+	}
+	if !reflect.DeepEqual(domain, &reconstructed) {
+		t.Fatal("Domain.SetBytes(Bytes()) failed")
+	}
+}
diff --git a/ecc/bls12-378/fr/fft/fft.go b/ecc/bls12-378/fr/fft/fft.go
new file mode 100644
index 000000000..66f299d78
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fft.go
@@ -0,0 +1,318 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/bits"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// Decimation is used in the FFT call to select decimation in time or in frequency
+type Decimation uint8
+
+const (
+	DIT Decimation = iota
+	DIF
+)
+
+// parallelize threshold for a single butterfly op, if the fft stage is not parallelized already
+const butterflyThreshold = 16
+
+// FFT computes (recursively) the discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+//
+// example:
+// -------
+// domain := NewDomain(m, 2) -->  contains precomputed data for Z/mZ, and Z/4mZ
+// FFT(pol, DIT, 1) --> evaluates pol on the coset 1 in (Z/4mZ)/(Z/mZ)
+func (domain *Domain) FFT(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// if coset != 0, scale by coset table
+	if coset != 0 {
+		scale := func(cosetTable []fr.Element) {
+			parallel.Execute(len(a), func(start, end int) {
+				for i := start; i < end; i++ {
+					a[i].Mul(&a[i], &cosetTable[i])
+				}
+			})
+		}
+		if decimation == DIT {
+			if domain.PrecomputeReversedTable == 0 {
+				// no precomputed coset, we adjust the index of the coset table
+				n := uint64(len(a))
+				nn := uint64(64 - bits.TrailingZeros64(n))
+				parallel.Execute(len(a), func(start, end int) {
+					for i := start; i < end; i++ {
+						irev := bits.Reverse64(uint64(i)) >> nn
+						a[i].Mul(&a[i], &domain.CosetTable[coset-1][int(irev)])
+					}
+				})
+			} else {
+				scale(domain.CosetTableReversed[coset-1])
+			}
+		} else {
+			scale(domain.CosetTable[coset-1])
+		}
+	}
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+}
+
+// FFTInverse computes (recursively) the inverse discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+
+	// scale by CardinalityInv (+ cosetTableInv is coset!=0)
+	if coset == 0 {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+		return
+	}
+
+	scale := func(cosetTable []fr.Element) {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &cosetTable[i]).
+					Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+	}
+	if decimation == DIT {
+		scale(domain.CosetTableInv[coset-1])
+		return
+	}
+
+	// decimation == DIF
+	if domain.PrecomputeReversedTable != 0 {
+		scale(domain.CosetTableInvReversed[coset-1])
+		return
+	}
+
+	// no precomputed coset, we adjust the index of the coset table
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+	parallel.Execute(len(a), func(start, end int) {
+		for i := start; i < end; i++ {
+			irev := bits.Reverse64(uint64(i)) >> nn
+			a[i].Mul(&a[i], &domain.CosetTableInv[coset-1][int(irev)]).
+				Mul(&a[i], &domain.CardinalityInv)
+		}
+	})
+
+}
+
+func difFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIF8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for i := start; i < end; i++ {
+				fr.Butterfly(&a[i], &a[i+m])
+				a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+			}
+		}, numCPU)
+	} else {
+		// i == 0
+		fr.Butterfly(&a[0], &a[m])
+		for i := 1; i < m; i++ {
+			fr.Butterfly(&a[i], &a[i+m])
+			a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+		}
+	}
+
+	if m == 1 {
+		return
+	}
+
+	nextStage := stage + 1
+	if stage < maxSplits {
+		chDone := make(chan struct{}, 1)
+		go difFFT(a[m:n], twiddles, nextStage, maxSplits, chDone)
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		difFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+	}
+
+}
+
+func ditFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIT8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	nextStage := stage + 1
+
+	if stage < maxSplits {
+		// that's the only time we fire go routines
+		chDone := make(chan struct{}, 1)
+		go ditFFT(a[m:], twiddles, nextStage, maxSplits, chDone)
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		ditFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+
+	}
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for k := start; k < end; k++ {
+				a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+				fr.Butterfly(&a[k], &a[k+m])
+			}
+		}, numCPU)
+
+	} else {
+		fr.Butterfly(&a[0], &a[m])
+		for k := 1; k < m; k++ {
+			a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+			fr.Butterfly(&a[k], &a[k+m])
+		}
+	}
+}
+
+// BitReverse applies the bit-reversal permutation to a.
+// len(a) must be a power of 2 (as in every single function in this file)
+func BitReverse(a []fr.Element) {
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+
+	for i := uint64(0); i < n; i++ {
+		irev := bits.Reverse64(i) >> nn
+		if irev > i {
+			a[i], a[irev] = a[irev], a[i]
+		}
+	}
+}
+
+// kerDIT8 is a kernel that process a FFT of size 8
+func kerDIT8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+	fr.Butterfly(&a[0], &a[2])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[5], &a[7])
+	fr.Butterfly(&a[0], &a[4])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	fr.Butterfly(&a[1], &a[5])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	fr.Butterfly(&a[2], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[3], &a[7])
+}
+
+// kerDIF8 is a kernel that process a FFT of size 8
+func kerDIF8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[4])
+	fr.Butterfly(&a[1], &a[5])
+	fr.Butterfly(&a[2], &a[6])
+	fr.Butterfly(&a[3], &a[7])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[0], &a[2])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	fr.Butterfly(&a[5], &a[7])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+}
diff --git a/ecc/bls12-378/fr/fft/fft_test.go b/ecc/bls12-378/fr/fft/fft_test.go
new file mode 100644
index 000000000..c7416fff7
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fft_test.go
@@ -0,0 +1,413 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/big"
+	"strconv"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestFFT(t *testing.T) {
+	const maxSize = 1 << 10
+
+	nbCosets := 3
+	domainWithPrecompute := NewDomain(maxSize, 2, true)
+	domainWOPrecompute := NewDomain(maxSize, 2, false)
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 5
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("DIF FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 0)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets with precomputed values should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWithPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets W/O precompute should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWOPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWOPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIT FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			BitReverse(pol)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && pol[i].Equal(&backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWithPrecompute.FFT(pol, DIT, uint64(i))
+				domainWithPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWOPrecompute.FFT(pol, DIT, uint64(i))
+				domainWOPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 1)
+			domainWithPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFTInverse(pol, DIF, 1)
+			domainWOPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// --------------------------------------------------------------------
+// benches
+func BenchmarkBitReverse(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		b.Run("bit reversing 2**"+strconv.Itoa(i)+"bits", func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				BitReverse(pol[:1<<i])
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFT(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		sizeDomain := 1 << i
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (no cosets)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 0, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 0)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets without precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets with precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, true)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFTDITCosetReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIT, 1)
+	}
+}
+
+func BenchmarkFFTDIFReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIF, 0)
+	}
+}
+
+func evaluatePolynomial(pol []fr.Element, val fr.Element) fr.Element {
+	var acc, res, tmp fr.Element
+	res.Set(&pol[0])
+	acc.Set(&val)
+	for i := 1; i < len(pol); i++ {
+		tmp.Mul(&acc, &pol[i])
+		res.Add(&res, &tmp)
+		acc.Mul(&acc, &val)
+	}
+	return res
+}
diff --git a/ecc/bls12-378/fr/fft/fuzz.go b/ecc/bls12-378/fr/fft/fuzz.go
new file mode 100644
index 000000000..1c25b2420
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fuzz.go
@@ -0,0 +1,73 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	// random polynomial
+	size := len(data) / 8
+	if size == 0 {
+		return fuzzDiscard
+	}
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	paddedSize := ecc.NextPowerOfTwo(uint64(size))
+	p1 := make([]fr.Element, paddedSize)
+	p2 := make([]fr.Element, paddedSize)
+	for i := 0; i < len(p1); i++ {
+		p1[i].SetRawBytes(r)
+	}
+	copy(p2, p1)
+
+	// fft domain
+	nbCosets := uint64(uint8(data[0]) % 3)
+	domainWithPrecompute := NewDomain(paddedSize, nbCosets, true)
+	domainWOPrecompute := NewDomain(paddedSize, nbCosets, false)
+
+	// bitReverse(DIF FFT(DIT FFT (bitReverse))))==id
+	for i := uint64(0); i < nbCosets; i++ {
+		BitReverse(p1)
+		domainWithPrecompute.FFT(p1, DIT, i)
+		domainWOPrecompute.FFTInverse(p1, DIF, i)
+		BitReverse(p1)
+
+		for i := 0; i < len(p1); i++ {
+			if !p1[i].Equal(&p2[i]) {
+				panic(fmt.Sprintf("bitReverse(DIF FFT(DIT FFT (bitReverse)))) != id, size %d", size))
+			}
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/fft/fuzz_test.go b/ecc/bls12-378/fr/fft/fuzz_test.go
new file mode 100644
index 000000000..9890547c0
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/fr/kzg/doc.go b/ecc/bls12-378/fr/kzg/doc.go
new file mode 100644
index 000000000..d8a77e8f6
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package kzg provides a KZG commitment scheme.
+package kzg
diff --git a/ecc/bls12-378/fr/kzg/fuzz.go b/ecc/bls12-378/fr/kzg/fuzz.go
new file mode 100644
index 000000000..0418cc962
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/fuzz.go
@@ -0,0 +1,84 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	if len(data) == 0 {
+		return fuzzDiscard
+	}
+	size := int(uint8(data[0])) + 2 // TODO fix min size in NewScheme
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	r := bytes.NewReader(data[1:])
+	var alpha, point fr.Element
+	alpha.SetRawBytes(r)
+	point.SetRawBytes(r)
+	s := NewScheme(size, alpha)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, size/2)
+	for i := 0; i < len(f); i++ {
+		f[i] = make(polynomial.Polynomial, size)
+		for j := 0; j < len(f[i]); j++ {
+			f[i][j].SetRawBytes(r)
+		}
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, size/2)
+	for i := 0; i < len(digests); i++ {
+		digests[i], _ = s.Commit(f[i])
+
+	}
+
+	proof, err := s.BatchOpenSinglePoint(&point, digests, f)
+	if err != nil {
+		panic(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < len(f); i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			panic("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = s.BatchVerifySinglePoint(digests, &proof)
+	if err != nil {
+		panic(err)
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/kzg/fuzz_test.go b/ecc/bls12-378/fr/kzg/fuzz_test.go
new file mode 100644
index 000000000..8379a59c7
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/fr/kzg/kzg.go b/ecc/bls12-378/fr/kzg/kzg.go
new file mode 100644
index 000000000..9a42a84d2
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/kzg.go
@@ -0,0 +1,518 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"errors"
+	"hash"
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	"github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrInvalidNbDigests              = errors.New("number of digests is not the same as the number of polynomials")
+	ErrInvalidPolynomialSize         = errors.New("invalid polynomial size (larger than SRS or == 0)")
+	ErrVerifyOpeningProof            = errors.New("can't verify opening proof")
+	ErrVerifyBatchOpeningSinglePoint = errors.New("can't verify batch opening proof at single point")
+	ErrMinSRSSize                    = errors.New("minimum srs size is 2")
+)
+
+// Digest commitment of a polynomial.
+type Digest = bls12378.G1Affine
+
+// SRS stores the result of the MPC
+type SRS struct {
+	G1 []bls12378.G1Affine  // [gen [alpha]gen , [alpha**2]gen, ... ]
+	G2 [2]bls12378.G2Affine // [gen, [alpha]gen ]
+}
+
+// NewSRS returns a new SRS using alpha as randomness source
+//
+// In production, a SRS generated through MPC should be used.
+//
+// implements io.ReaderFrom and io.WriterTo
+func NewSRS(size uint64, bAlpha *big.Int) (*SRS, error) {
+	if size < 2 {
+		return nil, ErrMinSRSSize
+	}
+	var srs SRS
+	srs.G1 = make([]bls12378.G1Affine, size)
+
+	var alpha fr.Element
+	alpha.SetBigInt(bAlpha)
+
+	_, _, gen1Aff, gen2Aff := bls12378.Generators()
+	srs.G1[0] = gen1Aff
+	srs.G2[0] = gen2Aff
+	srs.G2[1].ScalarMultiplication(&gen2Aff, bAlpha)
+
+	alphas := make([]fr.Element, size-1)
+	alphas[0] = alpha
+	for i := 1; i < len(alphas); i++ {
+		alphas[i].Mul(&alphas[i-1], &alpha)
+	}
+	for i := 0; i < len(alphas); i++ {
+		alphas[i].FromMont()
+	}
+	g1s := bls12378.BatchScalarMultiplicationG1(&gen1Aff, alphas)
+	copy(srs.G1[1:], g1s)
+
+	return &srs, nil
+}
+
+// OpeningProof KZG proof for opening at a single point.
+//
+// implements io.ReaderFrom and io.WriterTo
+type OpeningProof struct {
+	// H quotient polynomial (f - f(z))/(x-z)
+	H bls12378.G1Affine
+
+	// Point at which the polynomial is evaluated
+	Point fr.Element
+
+	// ClaimedValue purported value
+	ClaimedValue fr.Element
+}
+
+// BatchOpeningProof opening proof for many polynomials at the same point
+//
+// implements io.ReaderFrom and io.WriterTo
+type BatchOpeningProof struct {
+	// H quotient polynomial Sum_i gamma**i*(f - f(z))/(x-z)
+	H bls12378.G1Affine
+
+	// Point at which the polynomials are evaluated
+	Point fr.Element
+
+	// ClaimedValues purported values
+	ClaimedValues []fr.Element
+}
+
+// Commit commits to a polynomial using a multi exponentiation with the SRS.
+// It is assumed that the polynomial is in canonical form, in Montgomery form.
+func Commit(p polynomial.Polynomial, srs *SRS, nbTasks ...int) (Digest, error) {
+
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return Digest{}, ErrInvalidPolynomialSize
+	}
+
+	var res bls12378.G1Affine
+
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	if len(nbTasks) > 0 {
+		config.NbTasks = nbTasks[0]
+	}
+	if _, err := res.MultiExp(srs.G1[:len(p)], p, config); err != nil {
+		return Digest{}, err
+	}
+
+	return res, nil
+}
+
+// Open computes an opening proof of polynomial p at given point.
+// fft.Domain Cardinality must be larger than p.Degree()
+func Open(p polynomial.Polynomial, point *fr.Element, domain *fft.Domain, srs *SRS) (OpeningProof, error) {
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return OpeningProof{}, ErrInvalidPolynomialSize
+	}
+
+	// build the proof
+	res := OpeningProof{
+		Point:        *point,
+		ClaimedValue: p.Eval(point),
+	}
+
+	// compute H
+	_p := make(polynomial.Polynomial, len(p))
+	copy(_p, p)
+	h := dividePolyByXminusA(_p, res.ClaimedValue, res.Point)
+
+	_p = nil // h re-use this memory
+
+	// commit to H
+	hCommit, err := Commit(h, srs)
+	if err != nil {
+		return OpeningProof{}, err
+	}
+	res.H.Set(&hCommit)
+
+	return res, nil
+}
+
+// Verify verifies a KZG opening proof at a single point
+func Verify(commitment *Digest, proof *OpeningProof, srs *SRS) error {
+
+	// comm(f(a))
+	var claimedValueG1Aff bls12378.G1Affine
+	var claimedValueBigInt big.Int
+	proof.ClaimedValue.ToBigIntRegular(&claimedValueBigInt)
+	claimedValueG1Aff.ScalarMultiplication(&srs.G1[0], &claimedValueBigInt)
+
+	// [f(alpha) - f(a)]G1Jac
+	var fminusfaG1Jac, tmpG1Jac bls12378.G1Jac
+	fminusfaG1Jac.FromAffine(commitment)
+	tmpG1Jac.FromAffine(&claimedValueG1Aff)
+	fminusfaG1Jac.SubAssign(&tmpG1Jac)
+
+	// [-H(alpha)]G1Aff
+	var negH bls12378.G1Affine
+	negH.Neg(&proof.H)
+
+	// [alpha-a]G2Jac
+	var alphaMinusaG2Jac, genG2Jac, alphaG2Jac bls12378.G2Jac
+	var pointBigInt big.Int
+	proof.Point.ToBigIntRegular(&pointBigInt)
+	genG2Jac.FromAffine(&srs.G2[0])
+	alphaG2Jac.FromAffine(&srs.G2[1])
+	alphaMinusaG2Jac.ScalarMultiplication(&genG2Jac, &pointBigInt).
+		Neg(&alphaMinusaG2Jac).
+		AddAssign(&alphaG2Jac)
+
+	// [alpha-a]G2Aff
+	var xminusaG2Aff bls12378.G2Affine
+	xminusaG2Aff.FromJacobian(&alphaMinusaG2Jac)
+
+	// [f(alpha) - f(a)]G1Aff
+	var fminusfaG1Aff bls12378.G1Affine
+	fminusfaG1Aff.FromJacobian(&fminusfaG1Jac)
+
+	// e([-H(alpha)]G1Aff, G2gen).e([-H(alpha)]G1Aff, [alpha-a]G2Aff) ==? 1
+	check, err := bls12378.PairingCheck(
+		[]bls12378.G1Affine{fminusfaG1Aff, negH},
+		[]bls12378.G2Affine{srs.G2[0], xminusaG2Aff},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+}
+
+// BatchOpenSinglePoint creates a batch opening proof at _val of a list of polynomials.
+// It's an interactive protocol, made non interactive using Fiat Shamir.
+// point is the point at which the polynomials are opened.
+// digests is the list of committed polynomials to open, need to derive the challenge using Fiat Shamir.
+// polynomials is the list of polynomials to open.
+func BatchOpenSinglePoint(polynomials []polynomial.Polynomial, digests []Digest, point *fr.Element, hf hash.Hash, domain *fft.Domain, srs *SRS) (BatchOpeningProof, error) {
+
+	// check for invalid sizes
+	nbDigests := len(digests)
+	if nbDigests != len(polynomials) {
+		return BatchOpeningProof{}, ErrInvalidNbDigests
+	}
+	largestPoly := -1
+	for _, p := range polynomials {
+		if len(p) == 0 || len(p) > len(srs.G1) {
+			return BatchOpeningProof{}, ErrInvalidPolynomialSize
+		}
+		if len(p) > largestPoly {
+			largestPoly = len(p)
+		}
+	}
+
+	var res BatchOpeningProof
+
+	// compute the purported values
+	res.ClaimedValues = make([]fr.Element, len(polynomials))
+	var wg sync.WaitGroup
+	wg.Add(len(polynomials))
+	for i := 0; i < len(polynomials); i++ {
+		go func(at int) {
+			res.ClaimedValues[at] = polynomials[at].Eval(point)
+			wg.Done()
+		}(i)
+	}
+
+	// set the point at which the evaluation is done
+	res.Point = *point
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(res.Point, digests, hf)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	// compute sum_i gamma**i*f(a)
+	var sumGammaiTimesEval fr.Element
+	chSumGammai := make(chan struct{}, 1)
+	go func() {
+		// wait for polynomial evaluations to be completed (res.ClaimedValues)
+		wg.Wait()
+		sumGammaiTimesEval = res.ClaimedValues[nbDigests-1]
+		for i := nbDigests - 2; i >= 0; i-- {
+			sumGammaiTimesEval.Mul(&sumGammaiTimesEval, &gamma).
+				Add(&sumGammaiTimesEval, &res.ClaimedValues[i])
+		}
+		close(chSumGammai)
+	}()
+
+	// compute sum_i gamma**i*f
+	// that is p0 + gamma * p1 + gamma^2 * p2 + ... gamma^n * pn
+	// note: if we are willing to paralellize that, we could clone the poly and scale them by
+	// gamma n in parallel, before reducing into sumGammaiTimesPol
+	sumGammaiTimesPol := make(polynomial.Polynomial, largestPoly)
+	copy(sumGammaiTimesPol, polynomials[0])
+	gammaN := gamma
+	var pj fr.Element
+	for i := 1; i < len(polynomials); i++ {
+		for j := 0; j < len(polynomials[i]); j++ {
+			pj.Mul(&polynomials[i][j], &gammaN)
+			sumGammaiTimesPol[j].Add(&sumGammaiTimesPol[j], &pj)
+		}
+		gammaN.Mul(&gammaN, &gamma)
+	}
+
+	// compute H
+	<-chSumGammai
+	h := dividePolyByXminusA(sumGammaiTimesPol, sumGammaiTimesEval, res.Point)
+	sumGammaiTimesPol = nil // same memory as h
+
+	res.H, err = Commit(h, srs)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	return res, nil
+}
+
+// FoldProof fold the digests and the proofs in batchOpeningProof using Fiat Shamir
+// to obtain an opening proof at a single point.
+//
+// * digests list of digests on which batchOpeningProof is based
+// * batchOpeningProof opening proof of digests
+// * returns the folded version of batchOpeningProof, Digest, the folded version of digests
+func FoldProof(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash) (OpeningProof, Digest, error) {
+
+	nbDigests := len(digests)
+
+	// check consistancy between numbers of claims vs number of digests
+	if nbDigests != len(batchOpeningProof.ClaimedValues) {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(batchOpeningProof.Point, digests, hf)
+	if err != nil {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// fold the claimed values and digests
+	gammai := make([]fr.Element, nbDigests)
+	gammai[0].SetOne()
+	for i := 1; i < nbDigests; i++ {
+		gammai[i].Mul(&gammai[i-1], &gamma)
+	}
+	foldedDigests, foldedEvaluations, err := fold(digests, batchOpeningProof.ClaimedValues, gammai)
+	if err != nil {
+		return OpeningProof{}, Digest{}, err
+	}
+
+	// create the folded opening proof
+	var res OpeningProof
+	res.ClaimedValue.Set(&foldedEvaluations)
+	res.H.Set(&batchOpeningProof.H)
+	res.Point.Set(&batchOpeningProof.Point)
+
+	return res, foldedDigests, nil
+}
+
+// BatchVerifySinglePoint verifies a batched opening proof at a single point of a list of polynomials.
+//
+// * digests list of digests on which opening proof is done
+// * batchOpeningProof proof of correct opening on the digests
+func BatchVerifySinglePoint(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash, srs *SRS) error {
+
+	// fold the proof
+	foldedProof, foldedDigest, err := FoldProof(digests, batchOpeningProof, hf)
+	if err != nil {
+		return err
+	}
+
+	// verify the foldedProof againts the foldedDigest
+	err = Verify(&foldedDigest, &foldedProof, srs)
+	return err
+
+}
+
+// BatchVerifyMultiPoints batch verifies a list of opening proofs at different points.
+// The purpose of the batching is to have only one pairing for verifying several proofs.
+//
+// * digests list of committed polynomials which are opened
+// * proofs list of opening proofs of the digest
+func BatchVerifyMultiPoints(digests []Digest, proofs []OpeningProof, srs *SRS) error {
+
+	// check consistancy nb proogs vs nb digests
+	if len(digests) != len(proofs) {
+		return ErrInvalidNbDigests
+	}
+
+	// if only one digest, call Verify
+	if len(digests) == 1 {
+		return Verify(&digests[0], &proofs[0], srs)
+	}
+
+	// sample random numbers for sampling
+	randomNumbers := make([]fr.Element, len(digests))
+	randomNumbers[0].SetOne()
+	for i := 1; i < len(randomNumbers); i++ {
+		_, err := randomNumbers[i].SetRandom()
+		if err != nil {
+			return err
+		}
+	}
+
+	// combine random_i*quotient_i
+	var foldedQuotients bls12378.G1Affine
+	quotients := make([]bls12378.G1Affine, len(proofs))
+	for i := 0; i < len(randomNumbers); i++ {
+		quotients[i].Set(&proofs[i].H)
+	}
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	_, err := foldedQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return nil
+	}
+
+	// fold digests and evals
+	evals := make([]fr.Element, len(digests))
+	for i := 0; i < len(randomNumbers); i++ {
+		evals[i].Set(&proofs[i].ClaimedValue)
+	}
+	foldedDigests, foldedEvals, err := fold(digests, evals, randomNumbers)
+	if err != nil {
+		return err
+	}
+
+	// compute commitment to folded Eval
+	var foldedEvalsCommit bls12378.G1Affine
+	var foldedEvalsBigInt big.Int
+	foldedEvals.ToBigIntRegular(&foldedEvalsBigInt)
+	foldedEvalsCommit.ScalarMultiplication(&srs.G1[0], &foldedEvalsBigInt)
+
+	// compute F = foldedDigests - foldedEvalsCommit
+	foldedDigests.Sub(&foldedDigests, &foldedEvalsCommit)
+
+	// combine random_i*(point_i*quotient_i)
+	var foldedPointsQuotients bls12378.G1Affine
+	for i := 0; i < len(randomNumbers); i++ {
+		randomNumbers[i].Mul(&randomNumbers[i], &proofs[i].Point)
+	}
+	_, err = foldedPointsQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return err
+	}
+
+	// lhs first pairing
+	foldedDigests.Add(&foldedDigests, &foldedPointsQuotients)
+
+	// lhs second pairing
+	foldedQuotients.Neg(&foldedQuotients)
+
+	// pairing check
+	check, err := bls12378.PairingCheck(
+		[]bls12378.G1Affine{foldedDigests, foldedQuotients},
+		[]bls12378.G2Affine{srs.G2[0], srs.G2[1]},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+
+}
+
+// fold folds digests and evaluations using the list of factors as random numbers.
+//
+// * digests list of digests to fold
+// * evaluations list of evaluations to fold
+// * factors list of multiplicative factors used for the folding (in Montgomery form)
+func fold(digests []Digest, evaluations []fr.Element, factors []fr.Element) (Digest, fr.Element, error) {
+
+	// length inconsistancy between digests and evaluations should have been done before calling this function
+	nbDigests := len(digests)
+
+	// fold the claimed values
+	var foldedEvaluations, tmp fr.Element
+	for i := 0; i < nbDigests; i++ {
+		tmp.Mul(&evaluations[i], &factors[i])
+		foldedEvaluations.Add(&foldedEvaluations, &tmp)
+	}
+
+	// fold the digests
+	var foldedDigests Digest
+	_, err := foldedDigests.MultiExp(digests, factors, ecc.MultiExpConfig{ScalarsMont: true})
+	if err != nil {
+		return foldedDigests, foldedEvaluations, err
+	}
+
+	// folding done
+	return foldedDigests, foldedEvaluations, nil
+
+}
+
+// deriveGamma derives a challenge using Fiat Shamir to fold proofs.
+func deriveGamma(point fr.Element, digests []Digest, hf hash.Hash) (fr.Element, error) {
+
+	// derive the challenge gamma, binded to the point and the commitments
+	fs := fiatshamir.NewTranscript(hf, "gamma")
+	if err := fs.Bind("gamma", point.Marshal()); err != nil {
+		return fr.Element{}, err
+	}
+	for i := 0; i < len(digests); i++ {
+		if err := fs.Bind("gamma", digests[i].Marshal()); err != nil {
+			return fr.Element{}, err
+		}
+	}
+	gammaByte, err := fs.ComputeChallenge("gamma")
+	if err != nil {
+		return fr.Element{}, err
+	}
+	var gamma fr.Element
+	gamma.SetBytes(gammaByte)
+
+	return gamma, nil
+}
+
+// dividePolyByXminusA computes (f-f(a))/(x-a), in canonical basis, in regular form
+// f memory is re-used for the result
+func dividePolyByXminusA(f polynomial.Polynomial, fa, a fr.Element) polynomial.Polynomial {
+
+	// first we compute f-f(a)
+	f[0].Sub(&f[0], &fa)
+
+	// now we use syntetic division to divide by x-a
+	var t fr.Element
+	for i := len(f) - 2; i >= 0; i-- {
+		t.Mul(&f[i+1], &a)
+
+		f[i].Add(&f[i], &t)
+	}
+
+	// the result is of degree deg(f)-1
+	return f[1:]
+}
diff --git a/ecc/bls12-378/fr/kzg/kzg_test.go b/ecc/bls12-378/fr/kzg/kzg_test.go
new file mode 100644
index 000000000..837f2c305
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/kzg_test.go
@@ -0,0 +1,453 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"math/big"
+	"reflect"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+)
+
+// testSRS re-used accross tests of the KZG scheme
+var testSRS *SRS
+
+func init() {
+	const srsSize = 230
+	testSRS, _ = NewSRS(ecc.NextPowerOfTwo(srsSize), new(big.Int).SetInt64(42))
+}
+
+func TestDividePolyByXminusA(t *testing.T) {
+
+	const pSize = 230
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	// evaluate the polynomial at a random point
+	var point fr.Element
+	point.SetRandom()
+	evaluation := pol.Eval(&point)
+
+	// probabilistic test (using Schwartz Zippel lemma, evaluation at one point is enough)
+	var randPoint, xminusa fr.Element
+	randPoint.SetRandom()
+	polRandpoint := pol.Eval(&randPoint)
+	polRandpoint.Sub(&polRandpoint, &evaluation) // f(rand)-f(point)
+
+	// compute f-f(a)/x-a
+	h := dividePolyByXminusA(pol, evaluation, point)
+	pol = nil // h reuses this memory
+
+	if len(h) != 229 {
+		t.Fatal("inconsistant size of quotient")
+	}
+
+	hRandPoint := h.Eval(&randPoint)
+	xminusa.Sub(&randPoint, &point) // rand-point
+
+	// f(rand)-f(point)	==? h(rand)*(rand-point)
+	hRandPoint.Mul(&hRandPoint, &xminusa)
+
+	if !hRandPoint.Equal(&polRandpoint) {
+		t.Fatal("Error f-f(a)/x-a")
+	}
+}
+
+func TestSerializationSRS(t *testing.T) {
+
+	// create a SRS
+	srs, err := NewSRS(64, new(big.Int).SetInt64(42))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// serialize it...
+	var buf bytes.Buffer
+	_, err = srs.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// reconstruct the SRS
+	var _srs SRS
+	_, err = _srs.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compare
+	if !reflect.DeepEqual(srs, &_srs) {
+		t.Fatal("scheme serialization failed")
+	}
+
+}
+
+func TestCommit(t *testing.T) {
+
+	// create a polynomial
+	f := make(polynomial.Polynomial, 60)
+	for i := 0; i < 60; i++ {
+		f[i].SetRandom()
+	}
+
+	// commit using the method from KZG
+	_kzgCommit, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var kzgCommit bls12378.G1Affine
+	kzgCommit.Unmarshal(_kzgCommit.Marshal())
+
+	// check commitment using manual commit
+	var x fr.Element
+	x.SetString("42")
+	fx := f.Eval(&x)
+	var fxbi big.Int
+	fx.ToBigIntRegular(&fxbi)
+	var manualCommit bls12378.G1Affine
+	manualCommit.Set(&testSRS.G1[0])
+	manualCommit.ScalarMultiplication(&manualCommit, &fxbi)
+
+	// compare both results
+	if !kzgCommit.Equal(&manualCommit) {
+		t.Fatal("error KZG commitment")
+	}
+
+}
+
+func TestVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create a polynomial
+	f := randomPolynomial(60)
+
+	// commit the polynomial
+	digest, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := Open(f, &point, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed valued
+	expected := f.Eval(&point)
+	if !proof.ClaimedValue.Equal(&expected) {
+		t.Fatal("inconsistant claimed value")
+	}
+
+	// verify correct proof
+	err = Verify(&digest, &proof, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValue.Double(&proof.ClaimedValue)
+	err = Verify(&digest, &proof, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+}
+
+func TestBatchVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := BatchOpenSinglePoint(f, digests, &point, hf, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < 10; i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			t.Fatal("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+
+}
+
+func TestBatchVerifyMultiPoints(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute 2 batch opening proofs at 2 random points
+	points := make([]fr.Element, 2)
+	batchProofs := make([]BatchOpeningProof, 2)
+	points[0].SetRandom()
+	batchProofs[0], _ = BatchOpenSinglePoint(f[:5], digests[:5], &points[0], hf, domain, testSRS)
+	points[1].SetRandom()
+	batchProofs[1], _ = BatchOpenSinglePoint(f[5:], digests[5:], &points[1], hf, domain, testSRS)
+
+	// fold the 2 batch opening proofs
+	proofs := make([]OpeningProof, 2)
+	foldedDigests := make([]Digest, 2)
+	proofs[0], foldedDigests[0], _ = FoldProof(digests[:5], &batchProofs[0], hf)
+	proofs[1], foldedDigests[1], _ = FoldProof(digests[5:], &batchProofs[1], hf)
+
+	// check the the individual batch proofs are correct
+	err := Verify(&foldedDigests[0], &proofs[0], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = Verify(&foldedDigests[1], &proofs[1], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify correct folded proofs
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify tampered folded proofs
+	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err == nil {
+		t.Fatal(err)
+	}
+
+}
+
+const benchSize = 1 << 16
+
+func BenchmarkKZGCommit(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Commit(p, benchSRS)
+	}
+}
+
+func BenchmarkDivideByXMinusA(b *testing.B) {
+	const pSize = 1 << 22
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+	var a, fa fr.Element
+	a.SetRandom()
+	fa.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		dividePolyByXminusA(pol, fa, a)
+		pol = pol[:pSize]
+		pol[pSize-1] = pol[0]
+	}
+}
+
+func BenchmarkKZGOpen(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Open(p, &r, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGVerify(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// kzg scheme
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	// commit
+	comm, err := Commit(p, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// open
+	openingProof, err := Open(p, &r, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Verify(&comm, &openingProof, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchOpen10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchVerify10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	proof, err := BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchVerifySinglePoint(commitments[:], &proof, hf, benchSRS)
+	}
+}
+
+func randomPolynomial(size int) polynomial.Polynomial {
+	f := make(polynomial.Polynomial, size)
+	for i := 0; i < size; i++ {
+		f[i].SetRandom()
+	}
+	return f
+}
diff --git a/ecc/bls12-378/fr/kzg/marshal.go b/ecc/bls12-378/fr/kzg/marshal.go
new file mode 100644
index 000000000..9805f2fb1
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/marshal.go
@@ -0,0 +1,138 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"io"
+)
+
+// WriteTo writes binary encoding of the SRS
+func (srs *SRS) WriteTo(w io.Writer) (int64, error) {
+	// encode the SRS
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		srs.G1,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes SRS data from reader.
+func (srs *SRS) ReadFrom(r io.Reader) (int64, error) {
+	// decode the SRS
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		&srs.G1,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a OpeningProof
+func (proof *OpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes OpeningProof data from reader.
+func (proof *OpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a BatchOpeningProof
+func (proof *BatchOpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		proof.ClaimedValues,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes BatchOpeningProof data from reader.
+func (proof *BatchOpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValues,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bls12-378/fr/mimc/doc.go b/ecc/bls12-378/fr/mimc/doc.go
new file mode 100644
index 000000000..497bd40a9
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package mimc provides MiMC hash function using Miyaguchi–Preneel construction.
+package mimc
diff --git a/ecc/bls12-378/fr/mimc/fuzz.go b/ecc/bls12-378/fr/mimc/fuzz.go
new file mode 100644
index 000000000..41b557cf3
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/fuzz.go
@@ -0,0 +1,34 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	var s []byte
+	h := NewMiMC(string(data))
+	h.Write(data)
+	h.Sum(s)
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/mimc/mimc.go b/ecc/bls12-378/fr/mimc/mimc.go
new file mode 100644
index 000000000..b149eb88e
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/mimc.go
@@ -0,0 +1,174 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+import (
+	"hash"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"golang.org/x/crypto/sha3"
+)
+
+const mimcNbRounds = 91
+
+// BlockSize size that mimc consumes
+const BlockSize = fr.Bytes
+
+// Params constants for the mimc hash function
+type Params []fr.Element
+
+// NewParams creates new mimc object
+func NewParams(seed string) Params {
+
+	// set the constants
+	res := make(Params, mimcNbRounds)
+
+	rnd := sha3.Sum256([]byte(seed))
+	value := new(big.Int).SetBytes(rnd[:])
+
+	for i := 0; i < mimcNbRounds; i++ {
+		rnd = sha3.Sum256(value.Bytes())
+		value.SetBytes(rnd[:])
+		res[i].SetBigInt(value)
+	}
+
+	return res
+}
+
+// digest represents the partial evaluation of the checksum
+// along with the params of the mimc function
+type digest struct {
+	Params Params
+	h      fr.Element
+	data   []byte // data to hash
+}
+
+// NewMiMC returns a MiMCImpl object, pure-go reference implementation
+func NewMiMC(seed string) hash.Hash {
+	d := new(digest)
+	params := NewParams(seed)
+	//d.Reset()
+	d.Params = params
+	d.Reset()
+	return d
+}
+
+// Reset resets the Hash to its initial state.
+func (d *digest) Reset() {
+	d.data = nil
+	d.h = fr.Element{0, 0, 0, 0}
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (d *digest) Sum(b []byte) []byte {
+	buffer := d.checksum()
+	d.data = nil // flush the data already hashed
+	hash := buffer.Bytes()
+	b = append(b, hash[:]...)
+	return b
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (d *digest) Size() int {
+	return BlockSize
+}
+
+// BlockSize returns the number of bytes Sum will return.
+func (d *digest) BlockSize() int {
+	return BlockSize
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.data = append(d.data, p...)
+	return
+}
+
+// Hash hash using Miyaguchi–Preneel:
+// https://en.wikipedia.org/wiki/One-way_compression_function
+// The XOR operation is replaced by field addition, data is in Montgomery form
+func (d *digest) checksum() fr.Element {
+
+	var buffer [BlockSize]byte
+	var x fr.Element
+
+	// if data size is not multiple of BlockSizes we padd:
+	// .. || 0xaf8 -> .. || 0x0000...0af8
+	if len(d.data)%BlockSize != 0 {
+		q := len(d.data) / BlockSize
+		r := len(d.data) % BlockSize
+		sliceq := make([]byte, q*BlockSize)
+		copy(sliceq, d.data)
+		slicer := make([]byte, r)
+		copy(slicer, d.data[q*BlockSize:])
+		sliceremainder := make([]byte, BlockSize-r)
+		d.data = append(sliceq, sliceremainder...)
+		d.data = append(d.data, slicer...)
+	}
+
+	if len(d.data) == 0 {
+		d.data = make([]byte, 32)
+	}
+
+	nbChunks := len(d.data) / BlockSize
+
+	for i := 0; i < nbChunks; i++ {
+		copy(buffer[:], d.data[i*BlockSize:(i+1)*BlockSize])
+		x.SetBytes(buffer[:])
+		d.encrypt(x)
+		d.h.Add(&x, &d.h)
+	}
+
+	return d.h
+}
+
+// plain execution of a mimc run
+// m: message
+// k: encryption key
+func (d *digest) encrypt(m fr.Element) {
+
+	for i := 0; i < len(d.Params); i++ {
+		// m = (m+k+c)^5
+		var tmp fr.Element
+		tmp.Add(&m, &d.h).Add(&tmp, &d.Params[i])
+		m.Square(&tmp).
+			Square(&m).
+			Mul(&m, &tmp)
+	}
+	m.Add(&m, &d.h)
+	d.h = m
+}
+
+// Sum computes the mimc hash of msg from seed
+func Sum(seed string, msg []byte) ([]byte, error) {
+	params := NewParams(seed)
+	var d digest
+	d.Params = params
+	if _, err := d.Write(msg); err != nil {
+		return nil, err
+	}
+	h := d.checksum()
+	bytes := h.Bytes()
+	return bytes[:], nil
+}
diff --git a/ecc/bls12-378/fr/permutation/doc.go b/ecc/bls12-378/fr/permutation/doc.go
new file mode 100644
index 000000000..bdf98e6ca
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package permutation provides an API to build permutation proofs.
+package permutation
diff --git a/ecc/bls12-378/fr/permutation/permutation.go b/ecc/bls12-378/fr/permutation/permutation.go
new file mode 100644
index 000000000..a51f4ffd1
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/permutation.go
@@ -0,0 +1,361 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("t1 and t2 should be of the same size")
+	ErrSize             = errors.New("t1 and t2 should be of size a power of 2")
+	ErrPermutationProof = errors.New("permutation proof verification failed")
+)
+
+// Proof proof that the commitments of t1 and t2 come from
+// the same vector but permuted.
+type Proof struct {
+
+	// size of the polynomials
+	size int
+
+	// commitments of t1 & t2, the permuted vectors, and z, the accumulation
+	// polynomial
+	t1, t2, z kzg.Digest
+
+	// commitment to the quotient polynomial
+	q kzg.Digest
+
+	// opening proofs of t1, t2, z, q (in that order)
+	batchedProof kzg.BatchOpeningProof
+
+	// shifted opening proof of z
+	shiftedProof kzg.OpeningProof
+}
+
+// computeZ returns the accumulation polynomial in Lagrange basis.
+func computeZ(lt1, lt2 []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	z := make([]fr.Element, s)
+	d := make([]fr.Element, s)
+	z[0].SetOne()
+	d[0].SetOne()
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	var t fr.Element
+	for i := 0; i < s-1; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_i], t.Sub(&epsilon, &lt1[i]))
+		d[i+1].Mul(&d[i], t.Sub(&epsilon, &lt2[i]))
+	}
+	d = fr.BatchInvert(d)
+	for i := 0; i < s-1; i++ {
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_ii], &d[i+1])
+	}
+
+	return z
+}
+
+// computeH computes lt2*z(gx) - lt1*z
+func computeH(lt1, lt2, lz []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	res := make([]fr.Element, s)
+	var a, b fr.Element
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		a.Sub(&epsilon, &lt2[_i])
+		a.Mul(&lz[_ii], &a)
+		b.Sub(&epsilon, &lt1[_i])
+		b.Mul(&lz[_i], &b)
+		res[_i].Sub(&a, &b)
+	}
+	return res
+}
+
+// computeH0 computes L0 * (z-1)
+func computeH0(lz []fr.Element, d *fft.Domain) []fr.Element {
+
+	var tn, o, g fr.Element
+	s := len(lz)
+	tn.SetUint64(2).
+		Neg(&tn)
+	u := make([]fr.Element, s)
+	o.SetOne()
+	g.Set(&d.FinerGenerator)
+	for i := 0; i < s; i++ {
+		u[i].Sub(&g, &o)
+		g.Mul(&g, &d.Generator)
+	}
+	u = fr.BatchInvert(u)
+	res := make([]fr.Element, s)
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lz[_i], &o).
+			Mul(&res[_i], &u[i]).
+			Mul(&res[_i], &tn)
+	}
+	return res
+}
+
+// Prove generates a proof that t1 and t2 are the same but permuted.
+// The size of t1 and t2 should be the same and a power of 2.
+func Prove(srs *kzg.SRS, t1, t2 []fr.Element) (Proof, error) {
+
+	// res
+	var proof Proof
+	var err error
+
+	// size checking
+	if len(t1) != len(t2) {
+		return proof, ErrIncompatibleSize
+	}
+
+	// create the domains
+	d := fft.NewDomain(uint64(len(t1)), 1, false)
+	if d.Cardinality != uint64(len(t1)) {
+		return proof, ErrSize
+	}
+	s := int(d.Cardinality)
+	proof.size = s
+
+	// hash function for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// commit t1, t2
+	ct1 := make([]fr.Element, s)
+	ct2 := make([]fr.Element, s)
+	copy(ct1, t1)
+	copy(ct2, t2)
+	d.FFTInverse(ct1, fft.DIF, 0)
+	d.FFTInverse(ct2, fft.DIF, 0)
+	fft.BitReverse(ct1)
+	fft.BitReverse(ct2)
+	proof.t1, err = kzg.Commit(ct1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.t2, err = kzg.Commit(ct2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive challenge for z
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute Z and commit it
+	cz := computeZ(t1, t2, epsilon)
+	d.FFTInverse(cz, fft.DIT, 0)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+	lz := make([]fr.Element, s)
+	copy(lz, cz)
+	d.FFT(lz, fft.DIF, 1)
+
+	// compute the first part of the numerator
+	lt1 := make([]fr.Element, s)
+	lt2 := make([]fr.Element, s)
+	copy(lt1, ct1)
+	copy(lt2, ct2)
+	d.FFT(lt1, fft.DIF, 1)
+	d.FFT(lt2, fft.DIF, 1)
+	h := computeH(lt1, lt2, lz, epsilon)
+
+	// compute second part of the numerator
+	h0 := computeH0(lz, d)
+
+	// derive challenge used for the folding
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+
+	// fold the numerator and divide it by x^n-1
+	var t fr.Element
+	t.SetUint64(2).Neg(&t).Inverse(&t)
+	for i := 0; i < s; i++ {
+		h0[i].Mul(&omega, &h0[i]).
+			Add(&h0[i], &h[i]).
+			Mul(&h0[i], &t)
+	}
+
+	// get the quotient and commit it
+	d.FFTInverse(h0, fft.DIT, 1)
+	proof.q, err = kzg.Commit(h0, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive the evaluation challenge
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute the opening proofs
+	proof.batchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ct1,
+			ct2,
+			cz,
+			h0,
+		},
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&eta,
+		hFunc,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	eta.Mul(&eta, &d.Generator)
+	proof.shiftedProof, err = kzg.Open(
+		cz,
+		&eta,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	// done
+	return proof, nil
+
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bls12378.G1Affine) (fr.Element, error) {
+
+	var buf [bls12378.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
+
+// Verify verifies a permutation proof.
+func Verify(srs *kzg.SRS, proof Proof) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// derive the challenges
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return err
+	}
+
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return err
+	}
+
+	// check the relation
+	bs := big.NewInt(int64(proof.size))
+	var l0, a, b, one, rhs, lhs fr.Element
+	one.SetOne()
+	rhs.Exp(eta, bs).
+		Sub(&rhs, &one)
+	a.Sub(&eta, &one)
+	l0.Div(&rhs, &a)
+	rhs.Mul(&rhs, &proof.batchedProof.ClaimedValues[3])
+	a.Sub(&epsilon, &proof.batchedProof.ClaimedValues[1]).
+		Mul(&a, &proof.shiftedProof.ClaimedValue)
+	b.Sub(&epsilon, &proof.batchedProof.ClaimedValues[0]).
+		Mul(&b, &proof.batchedProof.ClaimedValues[2])
+	lhs.Sub(&a, &b)
+	a.Sub(&proof.batchedProof.ClaimedValues[2], &one).
+		Mul(&a, &l0).
+		Mul(&a, &omega)
+	lhs.Add(&a, &lhs)
+	if !lhs.Equal(&rhs) {
+		return ErrPermutationProof
+	}
+
+	// check the opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&proof.batchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.Verify(&proof.z, &proof.shiftedProof, srs)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/ecc/bls12-378/fr/permutation/permutation_test.go b/ecc/bls12-378/fr/permutation/permutation_test.go
new file mode 100644
index 000000000..9519e05b8
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/permutation_test.go
@@ -0,0 +1,94 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+)
+
+func TestProof(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	a := make([]fr.Element, 8)
+	b := make([]fr.Element, 8)
+
+	for i := 0; i < 8; i++ {
+		a[i].SetUint64(uint64(4*i + 1))
+	}
+	for i := 0; i < 8; i++ {
+		b[i].Set(&a[(5*i)%8])
+	}
+
+	// correct proof
+	{
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		a[0].SetRandom()
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkProver(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make([]fr.Element, polySize)
+	c := make([]fr.Element, polySize)
+
+	for i := 0; i < polySize; i++ {
+		a[i].SetUint64(uint64(i))
+	}
+	for i := 0; i < polySize; i++ {
+		c[i].Set(&a[(5*i)%(polySize)])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Prove(srs, a, c)
+	}
+
+}
diff --git a/ecc/bls12-378/fr/plookup/doc.go b/ecc/bls12-378/fr/plookup/doc.go
new file mode 100644
index 000000000..ec4b91287
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package plookup provides an API to build plookup proofs.
+package plookup
diff --git a/ecc/bls12-378/fr/plookup/plookup_test.go b/ecc/bls12-378/fr/plookup/plookup_test.go
new file mode 100644
index 000000000..d01a7c8a8
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/plookup_test.go
@@ -0,0 +1,139 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+)
+
+func TestLookupVector(t *testing.T) {
+
+	lookupVector := make(Table, 8)
+	fvector := make(Table, 7)
+	for i := 0; i < 8; i++ {
+		lookupVector[i].SetUint64(uint64(2 * i))
+	}
+	for i := 0; i < 7; i++ {
+		fvector[i].Set(&lookupVector[(4*i+1)%8])
+	}
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// correct proof vector
+	{
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proofs vector
+	{
+		fvector[0].SetRandom()
+
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func TestLookupTable(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	lookupTable := make([]Table, 3)
+	fTable := make([]Table, 3)
+	for i := 0; i < 3; i++ {
+		lookupTable[i] = make(Table, 8)
+		fTable[i] = make(Table, 7)
+		for j := 0; j < 8; j++ {
+			lookupTable[i][j].SetUint64(uint64(2*i + j))
+		}
+		for j := 0; j < 7; j++ {
+			fTable[i][j].Set(&lookupTable[i][(4*j+1)%8])
+		}
+	}
+
+	// correct proof
+	{
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		fTable[0][0].SetRandom()
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkPlookup(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make(Table, polySize)
+	c := make(Table, polySize)
+
+	for i := 0; i < 1<<14; i++ {
+		a[i].SetUint64(uint64(i))
+		c[i].SetUint64(uint64((8 * i) % polySize))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ProveLookupVector(srs, a, c)
+	}
+}
diff --git a/ecc/bls12-378/fr/plookup/table.go b/ecc/bls12-378/fr/plookup/table.go
new file mode 100644
index 000000000..51d432b51
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/table.go
@@ -0,0 +1,252 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"sort"
+
+	bls12378 "github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/permutation"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("the tables in f and t are not of the same size")
+	ErrFoldedCommitment = errors.New("the folded commitment is malformed")
+	ErrNumberDigests    = errors.New("proof.ts and proof.fs are not of the same length")
+)
+
+// ProofLookupTables proofs that a list of tables
+type ProofLookupTables struct {
+
+	// commitments to the rows f
+	fs []kzg.Digest
+
+	// commitments to the rows of t
+	ts []kzg.Digest
+
+	// lookup proof for the f and t folded
+	foldedProof ProofLookupVector
+
+	// proof that the ts folded correspond to t in the folded proof
+	permutationProof permutation.Proof
+}
+
+// ProveLookupTables generates a proof that f, seen as a multi dimensional table,
+// consists of vectors that are in t. In other words for each i, f[:][i] must be one
+// of the t[:][j].
+//
+// For instance, if t is the truth table of the XOR function, t will be populated such
+// that t[:][i] contains the i-th entry of the truth table, so t[0][i] XOR t[1][i] = t[2][i].
+//
+// The Table in f and t are supposed to be of the same size constant size.
+func ProveLookupTables(srs *kzg.SRS, f, t []Table) (ProofLookupTables, error) {
+
+	// res
+	proof := ProofLookupTables{}
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check the sizes
+	if len(f) != len(t) {
+		return proof, ErrIncompatibleSize
+	}
+	s := len(f[0])
+	for i := 1; i < len(f); i++ {
+		if len(f[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+	s = len(t[0])
+	for i := 1; i < len(t); i++ {
+		if len(t[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+
+	// commit to the tables in f and t
+	nbRows := len(t)
+	proof.fs = make([]kzg.Digest, nbRows)
+	proof.ts = make([]kzg.Digest, nbRows)
+	_nbColumns := len(f[0]) + 1
+	if _nbColumns < len(t[0]) {
+		_nbColumns = len(t[0])
+	}
+	d := fft.NewDomain(uint64(_nbColumns), 0, false)
+	nbColumns := d.Cardinality
+	lfs := make([][]fr.Element, nbRows)
+	cfs := make([][]fr.Element, nbRows)
+	lts := make([][]fr.Element, nbRows)
+	cts := make([][]fr.Element, nbRows)
+
+	for i := 0; i < nbRows; i++ {
+
+		cfs[i] = make([]fr.Element, nbColumns)
+		lfs[i] = make([]fr.Element, nbColumns)
+		copy(cfs[i], f[i])
+		copy(lfs[i], f[i])
+		for j := len(f[i]); j < int(nbColumns); j++ {
+			cfs[i][j] = f[i][len(f[i])-1]
+			lfs[i][j] = f[i][len(f[i])-1]
+		}
+		d.FFTInverse(cfs[i], fft.DIF, 0)
+		fft.BitReverse(cfs[i])
+		proof.fs[i], err = kzg.Commit(cfs[i], srs)
+		if err != nil {
+			return proof, err
+		}
+
+		cts[i] = make([]fr.Element, nbColumns)
+		lts[i] = make([]fr.Element, nbColumns)
+		copy(cts[i], t[i])
+		copy(lts[i], t[i])
+		for j := len(t[i]); j < int(d.Cardinality); j++ {
+			cts[i][j] = t[i][len(t[i])-1]
+			lts[i][j] = t[i][len(t[i])-1]
+		}
+		d.FFTInverse(cts[i], fft.DIF, 0)
+		fft.BitReverse(cts[i])
+		proof.ts[i], err = kzg.Commit(cts[i], srs)
+		if err != nil {
+			return proof, err
+		}
+	}
+
+	// fold f and t
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = new(kzg.Digest)
+		comms[i].Set(&proof.fs[i])
+		comms[nbRows+i] = new(kzg.Digest)
+		comms[nbRows+i].Set(&proof.ts[i])
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return proof, err
+	}
+	foldedf := make(Table, nbColumns)
+	foldedt := make(Table, nbColumns)
+	for i := 0; i < int(nbColumns); i++ {
+		for j := nbRows - 1; j >= 0; j-- {
+			foldedf[i].Mul(&foldedf[i], &lambda).
+				Add(&foldedf[i], &lfs[j][i])
+			foldedt[i].Mul(&foldedt[i], &lambda).
+				Add(&foldedt[i], &lts[j][i])
+		}
+	}
+
+	// generate a proof of permutation of the foldedt and sort(foldedt)
+	foldedtSorted := make(Table, nbColumns)
+	copy(foldedtSorted, foldedt)
+	sort.Sort(foldedtSorted)
+	proof.permutationProof, err = permutation.Prove(srs, foldedt, foldedtSorted)
+	if err != nil {
+		return proof, err
+	}
+
+	// call plookupVector, on foldedf[:len(foldedf)-1] to ensure that the domain size
+	// in ProveLookupVector is the same as d's
+	proof.foldedProof, err = ProveLookupVector(srs, foldedf[:len(foldedf)-1], foldedt)
+
+	return proof, err
+}
+
+// VerifyLookupTables verifies that a ProofLookupTables proof is correct.
+func VerifyLookupTables(srs *kzg.SRS, proof ProofLookupTables) error {
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check that the number of digests is the same
+	if len(proof.fs) != len(proof.ts) {
+		return ErrNumberDigests
+	}
+
+	// fold the commitments fs and ts
+	nbRows := len(proof.fs)
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = &proof.fs[i]
+		comms[i+nbRows] = &proof.ts[i]
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return err
+	}
+
+	// fold the commitments of the rows of t and f
+	var comf, comt kzg.Digest
+	comf.Set(&proof.fs[nbRows-1])
+	comt.Set(&proof.ts[nbRows-1])
+	var blambda big.Int
+	lambda.ToBigIntRegular(&blambda)
+	for i := nbRows - 2; i >= 0; i-- {
+		comf.ScalarMultiplication(&comf, &blambda).
+			Add(&comf, &proof.fs[i])
+		comt.ScalarMultiplication(&comt, &blambda).
+			Add(&comt, &proof.ts[i])
+	}
+
+	// check that the folded commitment of the fs correspond to foldedProof.f
+	if !comf.Equal(&proof.foldedProof.f) {
+		return ErrFoldedCommitment
+	}
+
+	// check that the folded commitment of the ts is a permutation of proof.FoldedProof.t
+	err = permutation.Verify(srs, proof.permutationProof)
+	if err != nil {
+		return err
+	}
+
+	// verify the inner proof
+	return VerifyLookupVector(srs, proof.foldedProof)
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bls12378.G1Affine) (fr.Element, error) {
+
+	var buf [bls12378.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
diff --git a/ecc/bls12-378/fr/plookup/vector.go b/ecc/bls12-378/fr/plookup/vector.go
new file mode 100644
index 000000000..81b8c536b
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/vector.go
@@ -0,0 +1,687 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+	"sort"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrNotInTable          = errors.New("some value in the vector is not in the lookup table")
+	ErrPlookupVerification = errors.New("plookup verification failed")
+)
+
+type Table []fr.Element
+
+// Len is the number of elements in the collection.
+func (t Table) Len() int {
+	return len(t)
+}
+
+// Less reports whether the element with
+// index i should sort before the element with index j.
+func (t Table) Less(i, j int) bool {
+	return t[i].Cmp(&t[j]) == -1
+}
+
+// Swap swaps the elements with indexes i and j.
+func (t Table) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+// Proof Plookup proof, containing opening proofs
+type ProofLookupVector struct {
+
+	// size of the system
+	size uint64
+
+	// Commitments to h1, h2, t, z, f, h
+	h1, h2, t, z, f, h kzg.Digest
+
+	// Batch opening proof of h1, h2, z, t
+	BatchedProof kzg.BatchOpeningProof
+
+	// Batch opening proof of h1, h2, z shifted by g
+	BatchedProofShifted kzg.BatchOpeningProof
+}
+
+// computeZ computes Z, in Lagrange basis. Z is the accumulation of the partial
+// ratios of 2 fully split polynomials (cf https://eprint.iacr.org/2020/315.pdf)
+// * lf is the list of values that should be in lt
+// * lt is the lookup table
+// * lh1, lh2 is lf sorted by lt split in 2 overlapping slices
+// * beta, gamma are challenges (Schwartz-zippel: they are the random evaluations point)
+func computeZ(lf, lt, lh1, lh2 []fr.Element, beta, gamma fr.Element) []fr.Element {
+
+	z := make([]fr.Element, len(lt))
+
+	n := len(lt)
+	d := make([]fr.Element, n-1)
+	var u, c fr.Element
+	c.SetOne().
+		Add(&c, &beta).
+		Mul(&c, &gamma)
+	for i := 0; i < n-1; i++ {
+
+		d[i].Mul(&beta, &lh1[i+1]).
+			Add(&d[i], &lh1[i]).
+			Add(&d[i], &c)
+
+		u.Mul(&beta, &lh2[i+1]).
+			Add(&u, &lh2[i]).
+			Add(&u, &c)
+
+		d[i].Mul(&d[i], &u)
+	}
+	d = fr.BatchInvert(d)
+
+	z[0].SetOne()
+	var a, b, e fr.Element
+	e.SetOne().Add(&e, &beta)
+	for i := 0; i < n-1; i++ {
+
+		a.Add(&gamma, &lf[i])
+
+		b.Mul(&beta, &lt[i+1]).
+			Add(&b, &lt[i]).
+			Add(&b, &c)
+
+		a.Mul(&a, &b).
+			Mul(&a, &e)
+
+		z[i+1].Mul(&z[i], &a).
+			Mul(&z[i+1], &d[i])
+	}
+
+	return z
+}
+
+// computeH computes the evaluation (shifted, bit reversed) of h where
+// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+//
+// * cz, ch1, ch2, ct, cf are the polynomials z, h1, h2, t, f in canonical basis
+// * _lz, _lh1, _lh2, _lt, _lf are the polynomials z, h1, h2, t, f in shifted Lagrange basis (domainH)
+// * beta, gamma are the challenges
+// * it returns h in canonical basis
+func computeH(_lz, _lh1, _lh2, _lt, _lf []fr.Element, beta, gamma fr.Element, domainH *fft.Domain) []fr.Element {
+
+	// result
+	s := int(domainH.Cardinality)
+	num := make([]fr.Element, domainH.Cardinality)
+
+	var u, v, w, _g, m, n, one, t fr.Element
+	t.SetUint64(2).
+		Inverse(&t)
+	_g.Square(&domainH.Generator).
+		Exp(_g, big.NewInt(int64(s/2-1)))
+	one.SetOne()
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	g := make([]fr.Element, s)
+	g[0].Set(&domainH.FinerGenerator)
+	for i := 1; i < s; i++ {
+		g[i].Mul(&g[i-1], &domainH.Generator)
+	}
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		// m = (x-g**(n-1))*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX))
+		m.Mul(&v, &_lz[_i])
+		u.Add(&gamma, &_lf[_i])
+		m.Mul(&m, &u)
+		u.Mul(&beta, &_lt[_is]).
+			Add(&u, &_lt[_i]).
+			Add(&u, &w)
+		m.Mul(&m, &u)
+
+		// n = (x-g**(n-1))*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX)
+		n.Mul(&beta, &_lh1[_is]).
+			Add(&n, &_lh1[_i]).
+			Add(&n, &w)
+		u.Mul(&beta, &_lh2[_is]).
+			Add(&u, &_lh2[_i]).
+			Add(&u, &w)
+		n.Mul(&n, &u).
+			Mul(&n, &_lz[_is])
+
+		num[_i].Sub(&m, &n)
+		u.Sub(&g[i], &_g)
+		num[_i].Mul(&num[_i], &u)
+
+	}
+
+	return num
+}
+
+// computeH0 returns l0 * (z-1), in Lagrange basis and bit reversed order
+func computeH0(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(den); i++ {
+		den[i].Sub(&d, &one)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHn returns ln * (z-1), in Lagrange basis and bit reversed order
+func computeHn(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	one.SetOne()
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(lzCosetReversed); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHh1h2 returns ln * (h1 - h2(g.x)), in Lagrange basis and bit reversed order
+func computeHh1h2(_lh1, _lh2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(_lh1))
+	for i := 0; i < len(_lh1); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(_lh1))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	s := len(_lh1)
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		res[_i].Sub(&_lh1[_i], &_lh2[_is]).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeQuotient computes the full quotient of the plookup protocol.
+// * alpha is the challenge to fold the numerator
+// * lh, lh0, lhn, lh1h2 are the various pieces of the numerator (Lagrange shifted form, bit reversed order)
+// * domainH fft domain
+// It returns the quotient, in canonical basis
+func computeQuotient(alpha fr.Element, lh, lh0, lhn, lh1h2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	s := len(lh)
+	res := make([]fr.Element, s)
+
+	var one fr.Element
+	one.SetOne()
+
+	var d [2]fr.Element
+	d[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality>>1)))
+	d[1].Neg(&d[0])
+	d[0].Sub(&d[0], &one).Inverse(&d[0])
+	d[1].Sub(&d[1], &one).Inverse(&d[1])
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+
+		res[_i].Mul(&lh1h2[_i], &alpha).
+			Add(&res[_i], &lhn[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh0[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh[_i]).
+			Mul(&res[_i], &d[i%2])
+	}
+
+	domainH.FFTInverse(res, fft.DIT, 1)
+
+	return res
+}
+
+// ProveLookupVector returns proof that the values in f are in t.
+//
+// /!\IMPORTANT/!\
+//
+// If the table t is already commited somewhere (which is the normal workflow
+// before generating a lookup proof), the commitment needs to be done on the
+// table sorted. Otherwise the commitment in proof.t will not be the same as
+// the public commitment: it will contain the same values, but permuted.
+//
+func ProveLookupVector(srs *kzg.SRS, f, t Table) (ProofLookupVector, error) {
+
+	// res
+	var proof ProofLookupVector
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// create domains
+	var dNum *fft.Domain
+	if len(t) <= len(f) {
+		dNum = fft.NewDomain(uint64(len(f)+1), 0, false)
+	} else {
+		dNum = fft.NewDomain(uint64(len(t)), 0, false)
+	}
+	cardDNum := int(dNum.Cardinality)
+
+	// set the size
+	proof.size = dNum.Cardinality
+
+	// resize f and t
+	// note: the last element of lf does not matter
+	lf := make([]fr.Element, cardDNum)
+	lt := make([]fr.Element, cardDNum)
+	cf := make([]fr.Element, cardDNum)
+	ct := make([]fr.Element, cardDNum)
+	copy(lt, t)
+	copy(lf, f)
+	for i := len(f); i < cardDNum; i++ {
+		lf[i] = f[len(f)-1]
+	}
+	for i := len(t); i < cardDNum; i++ {
+		lt[i] = t[len(t)-1]
+	}
+	sort.Sort(Table(lt))
+	copy(ct, lt)
+	copy(cf, lf)
+	dNum.FFTInverse(ct, fft.DIF, 0)
+	dNum.FFTInverse(cf, fft.DIF, 0)
+	fft.BitReverse(ct)
+	fft.BitReverse(cf)
+	proof.t, err = kzg.Commit(ct, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.f, err = kzg.Commit(cf, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// write f sorted by t
+	lfSortedByt := make(Table, 2*dNum.Cardinality-1)
+	copy(lfSortedByt, lt)
+	copy(lfSortedByt[dNum.Cardinality:], lf)
+	sort.Sort(lfSortedByt)
+
+	// compute h1, h2, commit to them
+	lh1 := make([]fr.Element, cardDNum)
+	lh2 := make([]fr.Element, cardDNum)
+	ch1 := make([]fr.Element, cardDNum)
+	ch2 := make([]fr.Element, cardDNum)
+	copy(lh1, lfSortedByt[:cardDNum])
+	copy(lh2, lfSortedByt[cardDNum-1:])
+
+	copy(ch1, lfSortedByt[:cardDNum])
+	copy(ch2, lfSortedByt[cardDNum-1:])
+	dNum.FFTInverse(ch1, fft.DIF, 0)
+	dNum.FFTInverse(ch2, fft.DIF, 0)
+	fft.BitReverse(ch1)
+	fft.BitReverse(ch2)
+
+	proof.h1, err = kzg.Commit(ch1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.h2, err = kzg.Commit(ch2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive beta, gamma
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return proof, err
+	}
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return proof, err
+	}
+
+	// Compute to Z
+	lz := computeZ(lf, lt, lh1, lh2, beta, gamma)
+	cz := make([]fr.Element, len(lz))
+	copy(cz, lz)
+	dNum.FFTInverse(cz, fft.DIF, 0)
+	fft.BitReverse(cz)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// prepare data for computing the quotient
+	// compute the numerator
+	s := dNum.Cardinality
+	domainH := fft.NewDomain(uint64(2*s), 1, false)
+	_lz := make([]fr.Element, 2*s)
+	_lh1 := make([]fr.Element, 2*s)
+	_lh2 := make([]fr.Element, 2*s)
+	_lt := make([]fr.Element, 2*s)
+	_lf := make([]fr.Element, 2*s)
+	copy(_lz, cz)
+	copy(_lh1, ch1)
+	copy(_lh2, ch2)
+	copy(_lt, ct)
+	copy(_lf, cf)
+	domainH.FFT(_lz, fft.DIF, 1)
+	domainH.FFT(_lh1, fft.DIF, 1)
+	domainH.FFT(_lh2, fft.DIF, 1)
+	domainH.FFT(_lt, fft.DIF, 1)
+	domainH.FFT(_lf, fft.DIF, 1)
+
+	// compute h
+	lh := computeH(_lz, _lh1, _lh2, _lt, _lf, beta, gamma, domainH)
+
+	// compute h0
+	lh0 := computeH0(_lz, domainH)
+
+	// compute hn
+	lhn := computeHn(_lz, domainH)
+
+	// compute hh1h2
+	lh1h2 := computeHh1h2(_lh1, _lh2, domainH)
+
+	// compute the quotient
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+	ch := computeQuotient(alpha, lh, lh0, lhn, lh1h2, domainH)
+	proof.h, err = kzg.Commit(ch, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// build the opening proofs
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return proof, err
+	}
+	proof.BatchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+			cf,
+			ch,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	nu.Mul(&nu, &dNum.Generator)
+	proof.BatchedProofShifted, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	return proof, nil
+}
+
+// VerifyLookupVector verifies that a ProofLookupVector proof is correct
+func VerifyLookupVector(srs *kzg.SRS, proof ProofLookupVector) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// derive the various challenges
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return err
+	}
+
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return err
+	}
+
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return err
+	}
+
+	// check opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&proof.BatchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&proof.BatchedProofShifted,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	// check polynomial relation using Schwartz Zippel
+	var lhs, rhs, nun, g, _g, a, v, w, one fr.Element
+	d := fft.NewDomain(proof.size, 0, false) // only there to access to root of 1...
+	one.SetOne()
+	g.Exp(d.Generator, big.NewInt(int64(d.Cardinality-1)))
+
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	// h(nu) where
+	// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+	//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+	lhs.Sub(&nu, &g).
+		Mul(&lhs, &proof.BatchedProof.ClaimedValues[3]).
+		Mul(&lhs, &v)
+	a.Add(&gamma, &proof.BatchedProof.ClaimedValues[4])
+	lhs.Mul(&lhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[2]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[2]).
+		Add(&a, &w)
+	lhs.Mul(&lhs, &a)
+
+	rhs.Sub(&nu, &g).
+		Mul(&rhs, &proof.BatchedProofShifted.ClaimedValues[3])
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[0]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[0]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[1]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[1]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+
+	lhs.Sub(&lhs, &rhs)
+
+	// check consistancy of bounds
+	var l0, ln, d1, d2 fr.Element
+	l0.Exp(nu, big.NewInt(int64(d.Cardinality))).Sub(&l0, &one)
+	ln.Set(&l0)
+	d1.Sub(&nu, &one)
+	d2.Sub(&nu, &g)
+	l0.Div(&l0, &d1)
+	ln.Div(&ln, &d2)
+
+	// l0*(z-1)
+	var l0z fr.Element
+	l0z.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&l0z, &l0)
+
+	// ln*(z-1)
+	var lnz fr.Element
+	lnz.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&ln, &lnz)
+
+	// ln*(h1 - h2(g.x))
+	var lnh1h2 fr.Element
+	lnh1h2.Sub(&proof.BatchedProof.ClaimedValues[0], &proof.BatchedProofShifted.ClaimedValues[1]).
+		Mul(&lnh1h2, &ln)
+
+	// fold the numerator
+	lnh1h2.Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lnz).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &l0z).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lhs)
+
+	// (x**n-1) * h(x) evaluated at nu
+	nun.Exp(nu, big.NewInt(int64(d.Cardinality)))
+	_g.Sub(&nun, &one)
+	_g.Mul(&proof.BatchedProof.ClaimedValues[5], &_g)
+	if !lnh1h2.Equal(&_g) {
+		return ErrPlookupVerification
+	}
+
+	return nil
+}
diff --git a/ecc/bls12-378/fr/polynomial/doc.go b/ecc/bls12-378/fr/polynomial/doc.go
new file mode 100644
index 000000000..83479b058
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package polynomial provides polynomial methods and commitment schemes.
+package polynomial
diff --git a/ecc/bls12-378/fr/polynomial/polynomial.go b/ecc/bls12-378/fr/polynomial/polynomial.go
new file mode 100644
index 000000000..27b5e17d1
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/polynomial.go
@@ -0,0 +1,123 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// Polynomial polynomial represented by coefficients bn254 fr field.
+type Polynomial []fr.Element
+
+// Degree returns the degree of the polynomial, which is the length of Data.
+func (p *Polynomial) Degree() uint64 {
+	return uint64(len(*p) - 1)
+}
+
+// Eval evaluates p at v
+// returns a fr.Element
+func (p *Polynomial) Eval(v *fr.Element) fr.Element {
+
+	res := (*p)[len(*p)-1]
+	for i := len(*p) - 2; i >= 0; i-- {
+		res.Mul(&res, v)
+		res.Add(&res, &(*p)[i])
+	}
+
+	return res
+}
+
+// Clone returns a copy of the polynomial
+func (p *Polynomial) Clone() Polynomial {
+	_p := make(Polynomial, len(*p))
+	copy(_p, *p)
+	return _p
+}
+
+// AddConstantInPlace adds a constant to the polynomial, modifying p
+func (p *Polynomial) AddConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Add(&(*p)[i], c)
+	}
+}
+
+// SubConstantInPlace subs a constant to the polynomial, modifying p
+func (p *Polynomial) SubConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Sub(&(*p)[i], c)
+	}
+}
+
+// ScaleInPlace multiplies p by v, modifying p
+func (p *Polynomial) ScaleInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Mul(&(*p)[i], c)
+	}
+}
+
+// Add adds p1 to p2
+// This function allocates a new slice unless p == p1 or p == p2
+func (p *Polynomial) Add(p1, p2 Polynomial) *Polynomial {
+
+	bigger := p1
+	smaller := p2
+	if len(bigger) < len(smaller) {
+		bigger, smaller = smaller, bigger
+	}
+
+	if len(*p) == len(bigger) && (&(*p)[0] == &bigger[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &smaller[i])
+		}
+		return p
+	}
+
+	if len(*p) == len(smaller) && (&(*p)[0] == &smaller[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &bigger[i])
+		}
+		*p = append(*p, bigger[len(smaller):]...)
+		return p
+	}
+
+	res := make(Polynomial, len(bigger))
+	copy(res, bigger)
+	for i := 0; i < len(smaller); i++ {
+		res[i].Add(&res[i], &smaller[i])
+	}
+	*p = res
+	return p
+}
+
+// Equal checks equality between two polynomials
+func (p *Polynomial) Equal(p1 Polynomial) bool {
+	if (*p == nil) != (p1 == nil) {
+		return false
+	}
+
+	if len(*p) != len(p1) {
+		return false
+	}
+
+	for i := range p1 {
+		if !(*p)[i].Equal(&p1[i]) {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/ecc/bls12-378/fr/polynomial/polynomial_test.go b/ecc/bls12-378/fr/polynomial/polynomial_test.go
new file mode 100644
index 000000000..73994acd5
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/polynomial_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+func TestPolynomialEval(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// random value
+	var point fr.Element
+	point.SetRandom()
+
+	// compute manually f(val)
+	var expectedEval, one, den fr.Element
+	var expo big.Int
+	one.SetOne()
+	expo.SetUint64(20)
+	expectedEval.Exp(point, &expo).
+		Sub(&expectedEval, &one)
+	den.Sub(&point, &one)
+	expectedEval.Div(&expectedEval, &den)
+
+	// compute purported evaluation
+	purportedEval := f.Eval(&point)
+
+	// check
+	if !purportedEval.Equal(&expectedEval) {
+		t.Fatal("polynomial evaluation failed")
+	}
+}
+
+func TestPolynomialAddConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to add
+	var c fr.Element
+	c.SetRandom()
+
+	// add constant
+	f.AddConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Add(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("AddConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialSubConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to sub
+	var c fr.Element
+	c.SetRandom()
+
+	// sub constant
+	f.SubConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Sub(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("SubConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialScaleInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to scale by
+	var c fr.Element
+	c.SetRandom()
+
+	// scale by constant
+	f.ScaleInPlace(&c)
+
+	// check
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&c) {
+			t.Fatal("ScaleInPlace failed")
+		}
+	}
+
+}
+
+func TestPolynomialAdd(t *testing.T) {
+
+	// build unbalanced polynomials
+	f1 := make(Polynomial, 20)
+	f1Backup := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f1[i].SetOne()
+		f1Backup[i].SetOne()
+	}
+	f2 := make(Polynomial, 10)
+	f2Backup := make(Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f2[i].SetOne()
+		f2Backup[i].SetOne()
+	}
+
+	// expected result
+	var one, two fr.Element
+	one.SetOne()
+	two.Double(&one)
+	expectedSum := make(Polynomial, 20)
+	for i := 0; i < 10; i++ {
+		expectedSum[i].Set(&two)
+	}
+	for i := 10; i < 20; i++ {
+		expectedSum[i].Set(&one)
+	}
+
+	// caller is empty
+	var g Polynomial
+	g.Add(f1, f2)
+	if !g.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// all operands are distincts
+	_f1 := f1.Clone()
+	_f1.Add(f1, f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// first operand = caller
+	_f1 = f1.Clone()
+	_f2 := f2.Clone()
+	_f1.Add(_f1, _f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+
+	// second operand = caller
+	_f1 = f1.Clone()
+	_f2 = f2.Clone()
+	_f1.Add(_f2, _f1)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+}
diff --git a/ecc/bls12-378/fuzz.go b/ecc/bls12-378/fuzz.go
new file mode 100644
index 000000000..ad1184831
--- /dev/null
+++ b/ecc/bls12-378/fuzz.go
@@ -0,0 +1,76 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/mimc"
+	"math/big"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	// TODO separate in multiple FuzzXXX and update continuous fuzzer scripts
+	// else, we don't really benefits for fuzzer strategy.
+	fr.Fuzz(data)
+	fp.Fuzz(data)
+	mimc.Fuzz(data)
+
+	// fuzz pairing
+	r := bytes.NewReader(data)
+	var e1, e2 fr.Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		var r, r1, r2, r1r2, zero GT
+		var b1, b2, b1b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		e2.ToBigIntRegular(&b2)
+		b1b2.Mul(&b1, &b2)
+
+		var p1 G1Affine
+		var p2 G2Affine
+
+		p1.ScalarMultiplication(&g1GenAff, &b1)
+		p2.ScalarMultiplication(&g2GenAff, &b2)
+
+		r, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+		r1, _ = Pair([]G1Affine{p1}, []G2Affine{g2GenAff})
+		r2, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{p2})
+
+		r1r2.Exp(&r, b1b2)
+		r1.Exp(&r1, b2)
+		r2.Exp(&r2, b1)
+
+		if !(r1r2.Equal(&r1) && r1r2.Equal(&r2) && !r.Equal(&zero)) {
+			panic("pairing bilinearity check failed")
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fuzz_test.go b/ecc/bls12-378/fuzz_test.go
new file mode 100644
index 000000000..128cc1196
--- /dev/null
+++ b/ecc/bls12-378/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/g1.go b/ecc/bls12-378/g1.go
new file mode 100644
index 000000000..ed417dd6b
--- /dev/null
+++ b/ecc/bls12-378/g1.go
@@ -0,0 +1,964 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G1Affine point in affine coordinates
+type G1Affine struct {
+	X, Y fp.Element
+}
+
+// G1Jac is a point with fp.Element coordinates
+type G1Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g1JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g1JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G1Affine) Set(a *G1Affine) *G1Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G1Affine) ScalarMultiplication(a *G1Affine, s *big.Int) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Add(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Sub(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G1Affine) Equal(a *G1Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G1Affine) Neg(a *G1Affine) *G1Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G1Affine) FromJacobian(p1 *G1Jac) *G1Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G1Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G1Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Affine) IsOnCurve() bool {
+	var point G1Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G1Affine) IsInSubGroup() bool {
+	var _p G1Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G1Jac) Set(a *G1Jac) *G1Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G1Jac) Equal(a *G1Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+
+	_a := G1Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G1Jac) Neg(a *G1Jac) *G1Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G1Jac) SubAssign(a *G1Jac) *G1Jac {
+	var tmp G1Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G1Jac) AddAssign(a *G1Jac) *G1Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G1Jac) AddMixed(a *G1Affine) *G1Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) Double(q *G1Jac) *G1Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) DoubleAssign() *G1Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) ScalarMultiplication(a *G1Jac, s *big.Int) *G1Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G1Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G1Jac) FromAffine(Q *G1Affine) *G1Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG1Affine, 1] is the kernel
+// of (u,v)->u+lambdaG1Affinev mod r. Expressing r, lambdaG1Affine as
+// polynomials in x, a short vector of this Zmodule is
+// 1, x**2. So we check that p+x**2*phi(p)
+// is the infinity.
+func (p *G1Jac) IsInSubGroup() bool {
+
+	var res G1Jac
+	res.phi(p).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(p)
+
+	return res.IsOnCurve() && res.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G1Jac) mulWindowed(a *G1Jac, s *big.Int) *G1Jac {
+
+	var res G1Jac
+	var ops [3]G1Jac
+
+	res.Set(&g1Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G1Jac) phi(a *G1Jac) *G1Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG1)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
+
+	var table [15]G1Jac
+	var res G1Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g1Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in E(Fp) to E(Fp)[r]
+func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
+	// cf https://eprint.iacr.org/2019/403.pdf, 5
+	var res G1Jac
+	res.ScalarMultiplication(a, &xGen).Neg(&res).AddAssign(a)
+	p.Set(&res)
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g1JacExtended) Set(a *g1JacExtended) *g1JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g1JacExtended) setInfinity() *g1JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G1Affine) fromJacExtended(Q *g1JacExtended) *G1Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G1Jac) fromJacExtended(Q *g1JacExtended) *G1Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g1Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G1Jac) unsafeFromJacExtended(Q *g1JacExtended) *G1Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g1JacExtended) add(q *g1JacExtended) *g1JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) double(q *g1JacExtended) *g1JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) subMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) addMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g1JacExtended) doubleNegMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) doubleMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// BatchJacobianToAffineG1 converts points in Jacobian coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchJacobianToAffineG1(points []G1Jac, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].Z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].Z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].Z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			var a, b fp.Element
+			a = result[i].X
+			b.Square(&a)
+			result[i].X.Mul(&points[i].X, &b)
+			result[i].Y.Mul(&points[i].Y, &b).
+				Mul(&result[i].Y, &a)
+		}
+	})
+
+}
+
+// BatchScalarMultiplicationG1 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G1Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g1Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	// convert our base exp table into affine to use AddMixed
+	baseTableAff := make([]G1Affine, (1 << (c - 1)))
+	BatchJacobianToAffineG1(baseTable, baseTableAff)
+	toReturn := make([]G1Jac, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G1Jac
+		for i := start; i < end; i++ {
+			p.Set(&g1Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddMixed(&baseTableAff[bits-1])
+				} else {
+					// sub
+					t := baseTableAff[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddMixed(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i] = p
+
+		}
+	})
+	toReturnAff := make([]G1Affine, len(scalars))
+	BatchJacobianToAffineG1(toReturn, toReturnAff)
+	return toReturnAff
+}
diff --git a/ecc/bls12-378/g1_test.go b/ecc/bls12-378/g1_test.go
new file mode 100644
index 000000000..13346156b
--- /dev/null
+++ b/ecc/bls12-378/g1_test.go
@@ -0,0 +1,666 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG1AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G1Jac
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G1Jac
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG1Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG1Svdw(a)
+			g2 := MapToCurveG1Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] g1Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Affine
+			op1.FromJacobian(&g1Gen)
+			op2.FromJacobian(&g1Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] g1Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G1Jac
+			op1.Set(&g1Gen)
+			op3.Set(&g1Gen)
+
+			op2 = fuzzJacobianG1Affine(&g1Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG1Affine(&g1Gen, a)
+			var op1 G1Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g1JacExtended
+			g.X.Set(&g1Gen.X)
+			g.Y.Set(&g1Gen.Y)
+			g.ZZ.Set(&g1Gen.Z)
+			g.ZZZ.Set(&g1Gen.Z)
+			gfuzz := fuzzExtendedJacobianG1Affine(&g, a)
+
+			var op1 G1Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G1Jac
+			var op1 G1Affine
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g1Gen.X) && g.Y.Equal(&g1Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G1Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			var op1 g1JacExtended
+			var zero fp.Element
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Jac
+			var op1 g1JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG1Affine(&g1Gen, a)
+			op2 := fuzzJacobianG1Affine(&g1Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BLS12-378] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			var op1, op2 G1Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.AddAssign(&g1Infinity)
+			var op2 G1Jac
+			op2.Set(&g1Infinity)
+			op2.AddAssign(&g1Gen)
+			return fop1.Equal(&g1Gen) && op2.Equal(&g1Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G1Affine
+			op2.FromJacobian(&g1Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g1Gen, &scalar)
+			op2.mulWindowed(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g1Gen, &scalar)
+			op2.ScalarMultiplication(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G1Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g1Gen, &r)
+			op2.mulGLV(&g1Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g1Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G1Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g1Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG1AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BLS12-378] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G1Jac
+				var expected G1Affine
+				var b big.Int
+				expectedJac.mulGLV(&g1Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG1JacIsInSubGroup(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG1AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG1JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G1Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g1Gen, &scalar)
+		}
+	})
+
+	var glv G1Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g1Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG1AffineCofactorClearing(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG1JacAdd(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g1Gen)
+	}
+}
+
+func BenchmarkG1JacAddMixed(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG1JacDouble(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG1JacExtAddMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtSubMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleNegMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtAdd(b *testing.B) {
+	var a, c g1JacExtended
+	a.doubleMixed(&g1GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG1JacExtDouble(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG1Affine(p *G1Jac, f fp.Element) G1Jac {
+	var res G1Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG1Affine(p *g1JacExtended, f fp.Element) g1JacExtended {
+	var res g1JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bls12-378/g2.go b/ecc/bls12-378/g2.go
new file mode 100644
index 000000000..10bbd197f
--- /dev/null
+++ b/ecc/bls12-378/g2.go
@@ -0,0 +1,978 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G2Affine point in affine coordinates
+type G2Affine struct {
+	X, Y fptower.E2
+}
+
+// G2Jac is a point with fptower.E2 coordinates
+type G2Jac struct {
+	X, Y, Z fptower.E2
+}
+
+//  g2JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g2JacExtended struct {
+	X, Y, ZZ, ZZZ fptower.E2
+}
+
+// g2Proj point in projective coordinates
+type g2Proj struct {
+	x, y, z fptower.E2
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G2Affine) Set(a *G2Affine) *G2Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G2Affine) ScalarMultiplication(a *G2Affine, s *big.Int) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Add(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Sub(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G2Affine) Equal(a *G2Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G2Affine) Neg(a *G2Affine) *G2Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G2Affine) FromJacobian(p1 *G2Jac) *G2Affine {
+
+	var a, b fptower.E2
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G2Affine) String() string {
+	var x, y fptower.E2
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G2Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Affine) IsOnCurve() bool {
+	var point G2Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G2Affine) IsInSubGroup() bool {
+	var _p G2Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G2Jac) Set(a *G2Jac) *G2Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G2Jac) Equal(a *G2Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+
+	_a := G2Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G2Jac) Neg(a *G2Jac) *G2Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G2Jac) SubAssign(a *G2Jac) *G2Jac {
+	var tmp G2Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G2Jac) AddAssign(a *G2Jac) *G2Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fptower.E2
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G2Jac) AddMixed(a *G2Affine) *G2Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fptower.E2
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) Double(q *G2Jac) *G2Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) DoubleAssign() *G2Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fptower.E2
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) ScalarMultiplication(a *G2Jac, s *big.Int) *G2Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G2Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G2Jac) FromAffine(Q *G2Affine) *G2Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Jac) IsOnCurve() bool {
+	var left, right, tmp fptower.E2
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bTwistCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// https://eprint.iacr.org/2021/1130.pdf, sec.4
+// psi(p) = u*P
+func (p *G2Jac) IsInSubGroup() bool {
+	var res, tmp G2Jac
+	tmp.psi(p)
+	res.ScalarMultiplication(p, &xGen).
+		SubAssign(&tmp)
+
+	return res.IsOnCurve() && res.Z.IsZero()
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G2Jac) mulWindowed(a *G2Jac, s *big.Int) *G2Jac {
+
+	var res G2Jac
+	var ops [3]G2Jac
+
+	res.Set(&g2Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// psi(p) = u o frob o u**-1 where u:E'->E iso from the twist to E
+func (p *G2Jac) psi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.Conjugate(&p.X).Mul(&p.X, &endo.u)
+	p.Y.Conjugate(&p.Y).Mul(&p.Y, &endo.v)
+	p.Z.Conjugate(&p.Z)
+	return p
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G2Jac) phi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.MulByElement(&p.X, &thirdRootOneG2)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
+
+	var table [15]G2Jac
+	var res G2Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g2Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
+	// https://eprint.iacr.org/2017/419.pdf, 4.1
+	var xg, xxg, res, t G2Jac
+	xg.ScalarMultiplication(a, &xGen)
+	xxg.ScalarMultiplication(&xg, &xGen)
+
+	res.Set(&xxg).
+		SubAssign(&xg).
+		SubAssign(a)
+
+	t.Set(&xg).
+		SubAssign(a).
+		psi(&t)
+
+	res.AddAssign(&t)
+
+	t.Double(a)
+	t.X.MulByElement(&t.X, &thirdRootOneG1)
+
+	res.SubAssign(&t)
+
+	p.Set(&res)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g2JacExtended) Set(a *g2JacExtended) *g2JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g2JacExtended) setInfinity() *g2JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fptower.E2{}
+	p.ZZZ = fptower.E2{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G2Affine) fromJacExtended(Q *g2JacExtended) *G2Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fptower.E2{}
+		p.Y = fptower.E2{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G2Jac) fromJacExtended(Q *g2JacExtended) *G2Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g2Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G2Jac) unsafeFromJacExtended(Q *g2JacExtended) *G2Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g2JacExtended) add(q *g2JacExtended) *g2JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fptower.E2
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fptower.E2
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) double(q *g2JacExtended) *g2JacExtended {
+	var U, V, W, S, XX, M fptower.E2
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) subMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fptower.E2
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fptower.E2
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) addMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fptower.E2
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fptower.E2
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g2JacExtended) doubleNegMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fptower.E2
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) doubleMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fptower.E2
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// -------------------------------------------------------------------------------------------------
+// Homogenous projective
+
+// Set sets p to the provided point
+func (p *g2Proj) Set(a *g2Proj) *g2Proj {
+	p.x, p.y, p.z = a.x, a.y, a.z
+	return p
+}
+
+// Neg computes -G
+func (p *g2Proj) Neg(a *g2Proj) *g2Proj {
+	*p = *a
+	p.y.Neg(&a.y)
+	return p
+}
+
+// FromJacobian converts a point from Jacobian to projective coordinates
+func (p *g2Proj) FromJacobian(Q *G2Jac) *g2Proj {
+	var buf fptower.E2
+	buf.Square(&Q.Z)
+
+	p.x.Mul(&Q.X, &Q.Z)
+	p.y.Set(&Q.Y)
+	p.z.Mul(&Q.Z, &buf)
+
+	return p
+}
+
+// FromAffine sets p = Q, p in homogenous projective, Q in affine
+func (p *g2Proj) FromAffine(Q *G2Affine) *g2Proj {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.z.SetZero()
+		p.x.SetOne()
+		p.y.SetOne()
+		return p
+	}
+	p.z.SetOne()
+	p.x.Set(&Q.X)
+	p.y.Set(&Q.Y)
+	return p
+}
+
+// BatchScalarMultiplicationG2 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG2(base *G2Affine, scalars []fr.Element) []G2Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G2Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g2Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	toReturn := make([]G2Affine, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G2Jac
+		for i := start; i < end; i++ {
+			p.Set(&g2Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddAssign(&baseTable[bits-1])
+				} else {
+					// sub
+					t := baseTable[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddAssign(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i].FromJacobian(&p)
+
+		}
+	})
+	return toReturn
+}
diff --git a/ecc/bls12-378/g2_test.go b/ecc/bls12-378/g2_test.go
new file mode 100644
index 000000000..f813c2b39
--- /dev/null
+++ b/ecc/bls12-378/g2_test.go
@@ -0,0 +1,685 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG2AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res1, res2 G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res, tmp G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] check that psi^2(P) = -phi(P)", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res1, res2 G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			res1.psi(&p).psi(&res1).Neg(&res1)
+			res2.Set(&p)
+			res2.X.MulByElement(&res2.X, &thirdRootOneG1)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			g := MapToCurveG2Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[G2] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fptower.E2) bool {
+			g1 := MapToCurveG2Svdw(a)
+			g2 := MapToCurveG2Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] g2Gen (affine) should be on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			var op1, op2 G2Affine
+			op1.FromJacobian(&g2Gen)
+			op2.FromJacobian(&g2Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] g2Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			var op1, op2, op3 G2Jac
+			op1.Set(&g2Gen)
+			op3.Set(&g2Gen)
+
+			op2 = fuzzJacobianG2Affine(&g2Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			g := fuzzJacobianG2Affine(&g2Gen, a)
+			var op1 G2Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			var g g2JacExtended
+			g.X.Set(&g2Gen.X)
+			g.Y.Set(&g2Gen.Y)
+			g.ZZ.Set(&g2Gen.Z)
+			g.ZZZ.Set(&g2Gen.Z)
+			gfuzz := fuzzExtendedJacobianG2Affine(&g, a)
+
+			var op1 G2Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			var g G2Jac
+			var op1 G2Affine
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+
+			var one fptower.E2
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g2Gen.X) && g.Y.Equal(&g2Gen.Y) && g.Z.Equal(&one)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G2Jac
+			op1.FromAffine(&g)
+			var one, zero fptower.E2
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			var op1 g2JacExtended
+			var zero fptower.E2
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Jac
+			var op1 g2JacExtended
+			var zero, one fptower.E2
+			one.SetOne()
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			op1 := fuzzJacobianG2Affine(&g2Gen, a)
+			op2 := fuzzJacobianG2Affine(&g2Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BLS12-378] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			var op1, op2 G2Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.AddAssign(&g2Infinity)
+			var op2 G2Jac
+			op2.Set(&g2Infinity)
+			op2.AddAssign(&g2Gen)
+			return fop1.Equal(&g2Gen) && op2.Equal(&g2Gen)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G2Affine
+			op2.FromJacobian(&g2Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g2Gen, &scalar)
+			op2.mulWindowed(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] psi should map points from E' to itself", prop.ForAll(
+		func() bool {
+			var a G2Jac
+			a.psi(&g2Gen)
+			return a.IsOnCurve() && !a.Equal(&g2Gen)
+		},
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g2Gen, &scalar)
+			op2.ScalarMultiplication(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G2Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g2Gen, &r)
+			op2.mulGLV(&g2Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g2Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fptower.E2
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+			for x.Legendre() != 1 {
+				a.SetRandom()
+				x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G2Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g2Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG2AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BLS12-378] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G2Jac
+				var expected G2Affine
+				var b big.Int
+				expectedJac.mulGLV(&g2Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG2JacIsInSubGroup(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG2AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG2JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G2Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g2Gen, &scalar)
+		}
+	})
+
+	var glv G2Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g2Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG2AffineCofactorClearing(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG2JacAdd(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g2Gen)
+	}
+}
+
+func BenchmarkG2JacAddMixed(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG2JacDouble(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG2JacExtAddMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtSubMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleNegMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtAdd(b *testing.B) {
+	var a, c g2JacExtended
+	a.doubleMixed(&g2GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG2JacExtDouble(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG2Affine(p *G2Jac, f fptower.E2) G2Jac {
+	var res G2Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG2Affine(p *g2JacExtended, f fptower.E2) g2JacExtended {
+	var res g2JacExtended
+	var ff, fff fptower.E2
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bls12-378/hash_to_curve.go b/ecc/bls12-378/hash_to_curve.go
new file mode 100644
index 000000000..74326a85a
--- /dev/null
+++ b/ecc/bls12-378/hash_to_curve.go
@@ -0,0 +1,276 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bls12378
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// hashToFp hashes msg to count prime field elements.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
+func hashToFp(msg, dst []byte, count int) ([]fp.Element, error) {
+
+	// 128 bits of security
+	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
+	L := 64
+
+	lenInBytes := count * L
+	pseudoRandomBytes, err := ecc.ExpandMsgXmd(msg, dst, lenInBytes)
+	if err != nil {
+		return nil, err
+	}
+
+	res := make([]fp.Element, count)
+	for i := 0; i < count; i++ {
+		res[i].SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
+	}
+	return res, nil
+}
+
+// returns false if u>-u when seen as a bigInt
+func sign0(u fp.Element) bool {
+	var a, b big.Int
+	u.ToBigIntRegular(&a)
+	u.Neg(&u)
+	u.ToBigIntRegular(&b)
+	return a.Cmp(&b) <= 0
+}
+
+// ----------------------------------------------------------------------------------------
+// G1Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG1(u fp.Element) G1Affine {
+
+	var res G1Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetString("2")
+	c2.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c3.SetString("287149757441151686413597772668332250774959033155223733277500818890482204710560507876289457345990628536787382328589")
+	c4.SetString("403498804050204114488832321054533973826565127847283275835680496023160623935600869850855868232558207447316188626942")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bCurveCoeff)
+	e2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if e2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG1Svdw(t fp.Element) G1Affine {
+	res := svdwMapG1(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG1Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG1Svdw(u[0])
+	Q1 := MapToCurveG1Svdw(u[1])
+	var _Q0, _Q1, _res G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
+
+// ----------------------------------------------------------------------------------------
+// G2Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG2(u fptower.E2) G2Affine {
+
+	var res G2Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fptower.E2
+	z.A0.SetOne()
+	z.A1.SetOne()
+	c1.A0.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940403")
+	c1.A1.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940416")
+	c2.A0.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c2.A1.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c3.A0.SetString("296552843788751288906244499216725356684281694271241895700730864223961612014909088554048735457137528455181151573749")
+	c3.A1.SetString("181388265705333345538985517067130917207305732282979825233670477511990909086507141331244586890249042878909613862256")
+	c4.A0.SetString("224166002250113396938240178363629985459202848804046264353155831123978124408667149917142149018087893026286771459412")
+	c4.A1.SetString("313832403150158755713536249709081979642883988325664770094418163573569374172134009883999008625323050236801480043178")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fptower.E2
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bTwistCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bTwistCurveCoeff)
+	e2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if e2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bTwistCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u.A0) && sign0(y.A0)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG2Svdw(t fptower.E2) G2Affine {
+	res := svdwMapG2(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	_t, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	var t fptower.E2
+	t.A0.Set(&_t[0])
+	t.A1.Set(&_t[1])
+	res = MapToCurveG2Svdw(t)
+	return res, nil
+}
+
+// HashToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	u, err := hashToFp(msg, dst, 4)
+	if err != nil {
+		return res, err
+	}
+	var u0, u1 fptower.E2
+	u0.A0.Set(&u[0])
+	u0.A1.Set(&u[1])
+	u1.A0.Set(&u[2])
+	u1.A1.Set(&u[3])
+	Q0 := MapToCurveG2Svdw(u0)
+	Q1 := MapToCurveG2Svdw(u1)
+	var _Q0, _Q1, _res G2Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
diff --git a/ecc/bls12-378/internal/fptower/asm.go b/ecc/bls12-378/internal/fptower/asm.go
new file mode 100644
index 000000000..0ec192019
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/asm.go
@@ -0,0 +1,28 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import "golang.org/x/sys/cpu"
+
+// supportAdx will be set only on amd64 that has MULX and ADDX instructions
+var (
+	supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
+	_          = supportAdx // used in asm
+)
diff --git a/ecc/bls12-378/internal/fptower/asm_noadx.go b/ecc/bls12-378/internal/fptower/asm_noadx.go
new file mode 100644
index 000000000..6a09c11c4
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/internal/fptower/e12.go b/ecc/bls12-378/internal/fptower/e12.go
new file mode 100644
index 000000000..aea14150d
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12.go
@@ -0,0 +1,561 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"encoding/binary"
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"math/big"
+)
+
+// E12 is a degree two finite field extension of fp6
+type E12 struct {
+	C0, C1 E6
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E12) Equal(x *E12) bool {
+	return z.C0.Equal(&x.C0) && z.C1.Equal(&x.C1)
+}
+
+// String puts E12 in string form
+func (z *E12) String() string {
+	return (z.C0.String() + "+(" + z.C1.String() + ")*w")
+}
+
+// SetString sets a E12 from string
+func (z *E12) SetString(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11 string) *E12 {
+	z.C0.SetString(s0, s1, s2, s3, s4, s5)
+	z.C1.SetString(s6, s7, s8, s9, s10, s11)
+	return z
+}
+
+// Set copies x into z and returns z
+func (z *E12) Set(x *E12) *E12 {
+	z.C0 = x.C0
+	z.C1 = x.C1
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E12) SetOne() *E12 {
+	*z = E12{}
+	z.C0.B0.A0.SetOne()
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E12) ToMont() *E12 {
+	z.C0.ToMont()
+	z.C1.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E12) FromMont() *E12 {
+	z.C0.FromMont()
+	z.C1.FromMont()
+	return z
+}
+
+// Add set z=x+y in E12 and return z
+func (z *E12) Add(x, y *E12) *E12 {
+	z.C0.Add(&x.C0, &y.C0)
+	z.C1.Add(&x.C1, &y.C1)
+	return z
+}
+
+// Sub sets z to x sub y and return z
+func (z *E12) Sub(x, y *E12) *E12 {
+	z.C0.Sub(&x.C0, &y.C0)
+	z.C1.Sub(&x.C1, &y.C1)
+	return z
+}
+
+// Double sets z=2*x and returns z
+func (z *E12) Double(x *E12) *E12 {
+	z.C0.Double(&x.C0)
+	z.C1.Double(&x.C1)
+	return z
+}
+
+// SetRandom used only in tests
+func (z *E12) SetRandom() (*E12, error) {
+	if _, err := z.C0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.C1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Mul set z=x*y in E12 and return z
+func (z *E12) Mul(x, y *E12) *E12 {
+	var a, b, c E6
+	a.Add(&x.C0, &x.C1)
+	b.Add(&y.C0, &y.C1)
+	a.Mul(&a, &b)
+	b.Mul(&x.C0, &y.C0)
+	c.Mul(&x.C1, &y.C1)
+	z.C1.Sub(&a, &b).Sub(&z.C1, &c)
+	z.C0.MulByNonResidue(&c).Add(&z.C0, &b)
+	return z
+}
+
+// Square set z=x*x in E12 and return z
+func (z *E12) Square(x *E12) *E12 {
+
+	//Algorithm 22 from https://eprint.iacr.org/2010/354.pdf
+	var c0, c2, c3 E6
+	c0.Sub(&x.C0, &x.C1)
+	c3.MulByNonResidue(&x.C1).Neg(&c3).Add(&x.C0, &c3)
+	c2.Mul(&x.C0, &x.C1)
+	c0.Mul(&c0, &c3).Add(&c0, &c2)
+	z.C1.Double(&c2)
+	c2.MulByNonResidue(&c2)
+	z.C0.Add(&c0, &c2)
+
+	return z
+}
+
+// Karabina's compressed cyclotomic square
+// https://eprint.iacr.org/2010/542.pdf
+// Th. 3.2 with minor modifications to fit our tower
+func (z *E12) CyclotomicSquareCompressed(x *E12) *E12 {
+
+	var t [7]E2
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = g5^2
+	t[1].Square(&x.C1.B2)
+	// t5 = g1 + g5
+	t[5].Add(&x.C0.B1, &x.C1.B2)
+	// t2 = (g1 + g5)^2
+	t[2].Square(&t[5])
+
+	// t3 = g1^2 + g5^2
+	t[3].Add(&t[0], &t[1])
+	// t5 = 2 * g1 * g5
+	t[5].Sub(&t[2], &t[3])
+
+	// t6 = g3 + g2
+	t[6].Add(&x.C1.B0, &x.C0.B2)
+	// t3 = (g3 + g2)^2
+	t[3].Square(&t[6])
+	// t2 = g3^2
+	t[2].Square(&x.C1.B0)
+
+	// t6 = 2 * nr * g1 * g5
+	t[6].MulByNonResidue(&t[5])
+	// t5 = 4 * nr * g1 * g5 + 2 * g3
+	t[5].Add(&t[6], &x.C1.B0).
+		Double(&t[5])
+	// z3 = 6 * nr * g1 * g5 + 2 * g3
+	z.C1.B0.Add(&t[5], &t[6])
+
+	// t4 = nr * g5^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = nr * g5^2 + g1^2
+	t[5].Add(&t[0], &t[4])
+	// t6 = nr * g5^2 + g1^2 - g2
+	t[6].Sub(&t[5], &x.C0.B2)
+
+	// t1 = g2^2
+	t[1].Square(&x.C0.B2)
+
+	// t6 = 2 * nr * g5^2 + 2 * g1^2 - 2*g2
+	t[6].Double(&t[6])
+	// z2 = 3 * nr * g5^2 + 3 * g1^2 - 2*g2
+	z.C0.B2.Add(&t[6], &t[5])
+
+	// t4 = nr * g2^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = g3^2 + nr * g2^2
+	t[5].Add(&t[2], &t[4])
+	// t6 = g3^2 + nr * g2^2 - g1
+	t[6].Sub(&t[5], &x.C0.B1)
+	// t6 = 2 * g3^2 + 2 * nr * g2^2 - 2 * g1
+	t[6].Double(&t[6])
+	// z1 = 3 * g3^2 + 3 * nr * g2^2 - 2 * g1
+	z.C0.B1.Add(&t[6], &t[5])
+
+	// t0 = g2^2 + g3^2
+	t[0].Add(&t[2], &t[1])
+	// t5 = 2 * g3 * g2
+	t[5].Sub(&t[3], &t[0])
+	// t6 = 2 * g3 * g2 + g5
+	t[6].Add(&t[5], &x.C1.B2)
+	// t6 = 4 * g3 * g2 + 2 * g5
+	t[6].Double(&t[6])
+	// z5 = 6 * g3 * g2 + 2 * g5
+	z.C1.B2.Add(&t[5], &t[6])
+
+	return z
+}
+
+// Decompress Karabina's cyclotomic square result
+func (z *E12) Decompress(x *E12) *E12 {
+
+	var t [3]E2
+	var one E2
+	one.SetOne()
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = 3 * g1^2 - 2 * g2
+	t[1].Sub(&t[0], &x.C0.B2).
+		Double(&t[1]).
+		Add(&t[1], &t[0])
+		// t0 = E * g5^2 + t1
+	t[2].Square(&x.C1.B2)
+	t[0].MulByNonResidue(&t[2]).
+		Add(&t[0], &t[1])
+	// t1 = 1/(4 * g3)
+	t[1].Double(&x.C1.B0).
+		Double(&t[1]).
+		Inverse(&t[1]) // costly
+	// z4 = g4
+	z.C1.B1.Mul(&t[0], &t[1])
+
+	// t1 = g2 * g1
+	t[1].Mul(&x.C0.B2, &x.C0.B1)
+	// t2 = 2 * g4^2 - 3 * g2 * g1
+	t[2].Square(&z.C1.B1).
+		Sub(&t[2], &t[1]).
+		Double(&t[2]).
+		Sub(&t[2], &t[1])
+	// t1 = g3 * g5
+	t[1].Mul(&x.C1.B0, &x.C1.B2)
+	// c_0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+	t[2].Add(&t[2], &t[1])
+	z.C0.B0.MulByNonResidue(&t[2]).
+		Add(&z.C0.B0, &one)
+
+	z.C0.B1.Set(&x.C0.B1)
+	z.C0.B2.Set(&x.C0.B2)
+	z.C1.B0.Set(&x.C1.B0)
+	z.C1.B2.Set(&x.C1.B2)
+
+	return z
+}
+
+// BatchDecompress multiple Karabina's cyclotomic square results
+func BatchDecompress(x []E12) []E12 {
+
+	n := len(x)
+	if n == 0 {
+		return x
+	}
+
+	t0 := make([]E2, n)
+	t1 := make([]E2, n)
+	t2 := make([]E2, n)
+
+	var one E2
+	one.SetOne()
+
+	for i := 0; i < n; i++ {
+		// t0 = g1^2
+		t0[i].Square(&x[i].C0.B1)
+		// t1 = 3 * g1^2 - 2 * g2
+		t1[i].Sub(&t0[i], &x[i].C0.B2).
+			Double(&t1[i]).
+			Add(&t1[i], &t0[i])
+			// t0 = E * g5^2 + t1
+		t2[i].Square(&x[i].C1.B2)
+		t0[i].MulByNonResidue(&t2[i]).
+			Add(&t0[i], &t1[i])
+		// t1 = 4 * g3
+		t1[i].Double(&x[i].C1.B0).
+			Double(&t1[i])
+	}
+
+	t1 = BatchInvert(t1) // costs 1 inverse
+
+	for i := 0; i < n; i++ {
+		// z4 = g4
+		x[i].C1.B1.Mul(&t0[i], &t1[i])
+
+		// t1 = g2 * g1
+		t1[i].Mul(&x[i].C0.B2, &x[i].C0.B1)
+		// t2 = 2 * g4^2 - 3 * g2 * g1
+		t2[i].Square(&x[i].C1.B1)
+		t2[i].Sub(&t2[i], &t1[i])
+		t2[i].Double(&t2[i])
+		t2[i].Sub(&t2[i], &t1[i])
+
+		// t1 = g3 * g5
+		t1[i].Mul(&x[i].C1.B0, &x[i].C1.B2)
+		// z0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+		t2[i].Add(&t2[i], &t1[i])
+		x[i].C0.B0.MulByNonResidue(&t2[i]).
+			Add(&x[i].C0.B0, &one)
+	}
+
+	return x
+}
+
+// Granger-Scott's cyclotomic square
+// https://eprint.iacr.org/2009/565.pdf, 3.2
+func (z *E12) CyclotomicSquare(x *E12) *E12 {
+
+	// x=(x0,x1,x2,x3,x4,x5,x6,x7) in E2^6
+	// cyclosquare(x)=(3*x4^2*u + 3*x0^2 - 2*x0,
+	//					3*x2^2*u + 3*x3^2 - 2*x1,
+	//					3*x5^2*u + 3*x1^2 - 2*x2,
+	//					6*x1*x5*u + 2*x3,
+	//					6*x0*x4 + 2*x4,
+	//					6*x2*x3 + 2*x5)
+
+	var t [9]E2
+
+	t[0].Square(&x.C1.B1)
+	t[1].Square(&x.C0.B0)
+	t[6].Add(&x.C1.B1, &x.C0.B0).Square(&t[6]).Sub(&t[6], &t[0]).Sub(&t[6], &t[1]) // 2*x4*x0
+	t[2].Square(&x.C0.B2)
+	t[3].Square(&x.C1.B0)
+	t[7].Add(&x.C0.B2, &x.C1.B0).Square(&t[7]).Sub(&t[7], &t[2]).Sub(&t[7], &t[3]) // 2*x2*x3
+	t[4].Square(&x.C1.B2)
+	t[5].Square(&x.C0.B1)
+	t[8].Add(&x.C1.B2, &x.C0.B1).Square(&t[8]).Sub(&t[8], &t[4]).Sub(&t[8], &t[5]).MulByNonResidue(&t[8]) // 2*x5*x1*u
+
+	t[0].MulByNonResidue(&t[0]).Add(&t[0], &t[1]) // x4^2*u + x0^2
+	t[2].MulByNonResidue(&t[2]).Add(&t[2], &t[3]) // x2^2*u + x3^2
+	t[4].MulByNonResidue(&t[4]).Add(&t[4], &t[5]) // x5^2*u + x1^2
+
+	z.C0.B0.Sub(&t[0], &x.C0.B0).Double(&z.C0.B0).Add(&z.C0.B0, &t[0])
+	z.C0.B1.Sub(&t[2], &x.C0.B1).Double(&z.C0.B1).Add(&z.C0.B1, &t[2])
+	z.C0.B2.Sub(&t[4], &x.C0.B2).Double(&z.C0.B2).Add(&z.C0.B2, &t[4])
+
+	z.C1.B0.Add(&t[8], &x.C1.B0).Double(&z.C1.B0).Add(&z.C1.B0, &t[8])
+	z.C1.B1.Add(&t[6], &x.C1.B1).Double(&z.C1.B1).Add(&z.C1.B1, &t[6])
+	z.C1.B2.Add(&t[7], &x.C1.B2).Double(&z.C1.B2).Add(&z.C1.B2, &t[7])
+
+	return z
+}
+
+// Inverse set z to the inverse of x in E12 and return z
+func (z *E12) Inverse(x *E12) *E12 {
+	// Algorithm 23 from https://eprint.iacr.org/2010/354.pdf
+
+	var t0, t1, tmp E6
+	t0.Square(&x.C0)
+	t1.Square(&x.C1)
+	tmp.MulByNonResidue(&t1)
+	t0.Sub(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.C0.Mul(&x.C0, &t1)
+	z.C1.Mul(&x.C1, &t1).Neg(&z.C1)
+
+	return z
+}
+
+// Exp sets z=x**e and returns it
+func (z *E12) Exp(x *E12, e big.Int) *E12 {
+	var res E12
+	res.SetOne()
+	b := e.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0x80)
+		for j := 7; j >= 0; j-- {
+			res.Square(&res)
+			if (w&mask)>>j != 0 {
+				res.Mul(&res, x)
+			}
+			mask = mask >> 1
+		}
+	}
+	z.Set(&res)
+	return z
+}
+
+// InverseUnitary inverse a unitary element
+func (z *E12) InverseUnitary(x *E12) *E12 {
+	return z.Conjugate(x)
+}
+
+// Conjugate set z to x conjugated and return z
+func (z *E12) Conjugate(x *E12) *E12 {
+	*z = *x
+	z.C1.Neg(&z.C1)
+	return z
+}
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = 48 * 12
+
+// Marshal converts z to a byte slice
+func (z *E12) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (z *E12) Unmarshal(buf []byte) error {
+	return z.SetBytes(buf)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E12) Bytes() (r [SizeOfGT]byte) {
+	_z := *z
+	_z.FromMont()
+	binary.BigEndian.PutUint64(r[568:576], _z.C0.B0.A0[0])
+	binary.BigEndian.PutUint64(r[560:568], _z.C0.B0.A0[1])
+	binary.BigEndian.PutUint64(r[552:560], _z.C0.B0.A0[2])
+	binary.BigEndian.PutUint64(r[544:552], _z.C0.B0.A0[3])
+	binary.BigEndian.PutUint64(r[536:544], _z.C0.B0.A0[4])
+	binary.BigEndian.PutUint64(r[528:536], _z.C0.B0.A0[5])
+
+	binary.BigEndian.PutUint64(r[520:528], _z.C0.B0.A1[0])
+	binary.BigEndian.PutUint64(r[512:520], _z.C0.B0.A1[1])
+	binary.BigEndian.PutUint64(r[504:512], _z.C0.B0.A1[2])
+	binary.BigEndian.PutUint64(r[496:504], _z.C0.B0.A1[3])
+	binary.BigEndian.PutUint64(r[488:496], _z.C0.B0.A1[4])
+	binary.BigEndian.PutUint64(r[480:488], _z.C0.B0.A1[5])
+
+	binary.BigEndian.PutUint64(r[472:480], _z.C0.B1.A0[0])
+	binary.BigEndian.PutUint64(r[464:472], _z.C0.B1.A0[1])
+	binary.BigEndian.PutUint64(r[456:464], _z.C0.B1.A0[2])
+	binary.BigEndian.PutUint64(r[448:456], _z.C0.B1.A0[3])
+	binary.BigEndian.PutUint64(r[440:448], _z.C0.B1.A0[4])
+	binary.BigEndian.PutUint64(r[432:440], _z.C0.B1.A0[5])
+
+	binary.BigEndian.PutUint64(r[424:432], _z.C0.B1.A1[0])
+	binary.BigEndian.PutUint64(r[416:424], _z.C0.B1.A1[1])
+	binary.BigEndian.PutUint64(r[408:416], _z.C0.B1.A1[2])
+	binary.BigEndian.PutUint64(r[400:408], _z.C0.B1.A1[3])
+	binary.BigEndian.PutUint64(r[392:400], _z.C0.B1.A1[4])
+	binary.BigEndian.PutUint64(r[384:392], _z.C0.B1.A1[5])
+
+	binary.BigEndian.PutUint64(r[376:384], _z.C0.B2.A0[0])
+	binary.BigEndian.PutUint64(r[368:376], _z.C0.B2.A0[1])
+	binary.BigEndian.PutUint64(r[360:368], _z.C0.B2.A0[2])
+	binary.BigEndian.PutUint64(r[352:360], _z.C0.B2.A0[3])
+	binary.BigEndian.PutUint64(r[344:352], _z.C0.B2.A0[4])
+	binary.BigEndian.PutUint64(r[336:344], _z.C0.B2.A0[5])
+
+	binary.BigEndian.PutUint64(r[328:336], _z.C0.B2.A1[0])
+	binary.BigEndian.PutUint64(r[320:328], _z.C0.B2.A1[1])
+	binary.BigEndian.PutUint64(r[312:320], _z.C0.B2.A1[2])
+	binary.BigEndian.PutUint64(r[304:312], _z.C0.B2.A1[3])
+	binary.BigEndian.PutUint64(r[296:304], _z.C0.B2.A1[4])
+	binary.BigEndian.PutUint64(r[288:296], _z.C0.B2.A1[5])
+
+	binary.BigEndian.PutUint64(r[280:288], _z.C1.B0.A0[0])
+	binary.BigEndian.PutUint64(r[272:280], _z.C1.B0.A0[1])
+	binary.BigEndian.PutUint64(r[264:272], _z.C1.B0.A0[2])
+	binary.BigEndian.PutUint64(r[256:264], _z.C1.B0.A0[3])
+	binary.BigEndian.PutUint64(r[248:256], _z.C1.B0.A0[4])
+	binary.BigEndian.PutUint64(r[240:248], _z.C1.B0.A0[5])
+
+	binary.BigEndian.PutUint64(r[232:240], _z.C1.B0.A1[0])
+	binary.BigEndian.PutUint64(r[224:232], _z.C1.B0.A1[1])
+	binary.BigEndian.PutUint64(r[216:224], _z.C1.B0.A1[2])
+	binary.BigEndian.PutUint64(r[208:216], _z.C1.B0.A1[3])
+	binary.BigEndian.PutUint64(r[200:208], _z.C1.B0.A1[4])
+	binary.BigEndian.PutUint64(r[192:200], _z.C1.B0.A1[5])
+
+	binary.BigEndian.PutUint64(r[184:192], _z.C1.B1.A0[0])
+	binary.BigEndian.PutUint64(r[176:184], _z.C1.B1.A0[1])
+	binary.BigEndian.PutUint64(r[168:176], _z.C1.B1.A0[2])
+	binary.BigEndian.PutUint64(r[160:168], _z.C1.B1.A0[3])
+	binary.BigEndian.PutUint64(r[152:160], _z.C1.B1.A0[4])
+	binary.BigEndian.PutUint64(r[144:152], _z.C1.B1.A0[5])
+
+	binary.BigEndian.PutUint64(r[136:144], _z.C1.B1.A1[0])
+	binary.BigEndian.PutUint64(r[128:136], _z.C1.B1.A1[1])
+	binary.BigEndian.PutUint64(r[120:128], _z.C1.B1.A1[2])
+	binary.BigEndian.PutUint64(r[112:120], _z.C1.B1.A1[3])
+	binary.BigEndian.PutUint64(r[104:112], _z.C1.B1.A1[4])
+	binary.BigEndian.PutUint64(r[96:104], _z.C1.B1.A1[5])
+
+	binary.BigEndian.PutUint64(r[88:96], _z.C1.B2.A0[0])
+	binary.BigEndian.PutUint64(r[80:88], _z.C1.B2.A0[1])
+	binary.BigEndian.PutUint64(r[72:80], _z.C1.B2.A0[2])
+	binary.BigEndian.PutUint64(r[64:72], _z.C1.B2.A0[3])
+	binary.BigEndian.PutUint64(r[56:64], _z.C1.B2.A0[4])
+	binary.BigEndian.PutUint64(r[48:56], _z.C1.B2.A0[5])
+
+	binary.BigEndian.PutUint64(r[40:48], _z.C1.B2.A1[0])
+	binary.BigEndian.PutUint64(r[32:40], _z.C1.B2.A1[1])
+	binary.BigEndian.PutUint64(r[24:32], _z.C1.B2.A1[2])
+	binary.BigEndian.PutUint64(r[16:24], _z.C1.B2.A1[3])
+	binary.BigEndian.PutUint64(r[8:16], _z.C1.B2.A1[4])
+	binary.BigEndian.PutUint64(r[0:8], _z.C1.B2.A1[5])
+
+	return
+}
+
+// SetBytes interprets e as the bytes of a big-endian GT
+// sets z to that value (in Montgomery form), and returns z.
+// size(e) == 48 * 12
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E12) SetBytes(e []byte) error {
+	if len(e) != SizeOfGT {
+		return errors.New("invalid buffer size")
+	}
+	z.C0.B0.A0.SetBytes(e[528 : 528+fp.Bytes])
+
+	z.C0.B0.A1.SetBytes(e[480 : 480+fp.Bytes])
+
+	z.C0.B1.A0.SetBytes(e[432 : 432+fp.Bytes])
+
+	z.C0.B1.A1.SetBytes(e[384 : 384+fp.Bytes])
+
+	z.C0.B2.A0.SetBytes(e[336 : 336+fp.Bytes])
+
+	z.C0.B2.A1.SetBytes(e[288 : 288+fp.Bytes])
+
+	z.C1.B0.A0.SetBytes(e[240 : 240+fp.Bytes])
+
+	z.C1.B0.A1.SetBytes(e[192 : 192+fp.Bytes])
+
+	z.C1.B1.A0.SetBytes(e[144 : 144+fp.Bytes])
+
+	z.C1.B1.A1.SetBytes(e[96 : 96+fp.Bytes])
+
+	z.C1.B2.A0.SetBytes(e[48 : 48+fp.Bytes])
+
+	z.C1.B2.A1.SetBytes(e[0 : 0+fp.Bytes])
+
+	return nil
+}
+
+// IsInSubGroup ensures GT/E12 is in correct sugroup
+func (z *E12) IsInSubGroup() bool {
+	var a, b E12
+
+	// check z^(Phi_k(p)) == 1
+	a.FrobeniusSquare(z)
+	b.FrobeniusSquare(&a).Mul(&b, z)
+
+	if !a.Equal(&b) {
+		return false
+	}
+
+	// check z^(p+1-t) == 1
+	a.Frobenius(z)
+	b.Expt(z)
+
+	return a.Equal(&b)
+}
diff --git a/ecc/bls12-378/internal/fptower/e12_pairing.go b/ecc/bls12-378/internal/fptower/e12_pairing.go
new file mode 100644
index 000000000..6441d245f
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12_pairing.go
@@ -0,0 +1,128 @@
+package fptower
+
+func (z *E12) nSquare(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquare(z)
+	}
+}
+
+func (z *E12) nSquareCompressed(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquareCompressed(z)
+	}
+}
+
+// Expt set z to x^t in E12 and return z
+func (z *E12) Expt(x *E12) *E12 {
+
+	// Expt computation is derived from the addition chain:
+	//
+	//	_1000     = 1 << 3
+	//	_1001     = 1 + _1000
+	//	_1001000  = _1001 << 3
+	//	_1010001  = _1001 + _1001000
+	//	_10011001 = _1001000 + _1010001
+	//	i67       = ((_10011001 << 5 + _1001) << 10 + _1010001) << 41
+	//	return      1 + i67
+	//
+	// Operations: 62 squares 6 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Allocate Temporaries.
+	var result, t0, t1 E12
+
+	// Step 3: result = x^0x8
+	result.CyclotomicSquare(x)
+	result.nSquare(2)
+
+	// Step 4: t0 = x^0x9
+	t0.Mul(x, &result)
+
+	// Step 7: t1 = x^0x48
+	t1.CyclotomicSquare(&t0)
+	t1.nSquare(2)
+
+	// Step 8: result = x^0x51
+	result.Mul(&t0, &t1)
+
+	// Step 9: t1 = x^0x99
+	t1.Mul(&t1, &result)
+
+	// Step 14: t1 = x^0x1320
+	t1.nSquare(5)
+
+	// Step 15: t0 = x^0x1329
+	t0.Mul(&t0, &t1)
+
+	// Step 25: t0 = x^0x4ca400
+	t0.nSquare(10)
+
+	// Step 26: result = x^0x4ca451
+	result.Mul(&result, &t0)
+
+	// Step 67: result = x^0x9948a20000000000
+	result.nSquareCompressed(41)
+	result.Decompress(&result)
+
+	// Step 68: result = x^0x9948a20000000001
+	z.Mul(x, &result)
+
+	return z
+}
+
+// MulBy014 multiplication by sparse element (c0, c1, 0, 0, c4)
+func (z *E12) MulBy014(c0, c1, c4 *E2) *E12 {
+
+	var a, b E6
+	var d E2
+
+	a.Set(&z.C0)
+	a.MulBy01(c0, c1)
+
+	b.Set(&z.C1)
+	b.MulBy1(c4)
+	d.Add(c1, c4)
+
+	z.C1.Add(&z.C1, &z.C0)
+	z.C1.MulBy01(c0, &d)
+	z.C1.Sub(&z.C1, &a)
+	z.C1.Sub(&z.C1, &b)
+	z.C0.MulByNonResidue(&b)
+	z.C0.Add(&z.C0, &a)
+
+	return z
+}
+
+// Mul014By014 multiplication of sparse element (c0,c1,0,0,c4,0) by sparse element (d0,d1,0,0,d4,0)
+func (z *E12) Mul014By014(d0, d1, d4, c0, c1, c4 *E2) *E12 {
+	var tmp, x0, x1, x4, x04, x01, x14 E2
+	x0.Mul(c0, d0)
+	x1.Mul(c1, d1)
+	x4.Mul(c4, d4)
+	tmp.Add(c0, c4)
+	x04.Add(d0, d4).
+		Mul(&x04, &tmp).
+		Sub(&x04, &x0).
+		Sub(&x04, &x4)
+	tmp.Add(c0, c1)
+	x01.Add(d0, d1).
+		Mul(&x01, &tmp).
+		Sub(&x01, &x0).
+		Sub(&x01, &x1)
+	tmp.Add(c1, c4)
+	x14.Add(d1, d4).
+		Mul(&x14, &tmp).
+		Sub(&x14, &x1).
+		Sub(&x14, &x4)
+
+	z.C0.B0.MulByNonResidue(&x4).
+		Add(&z.C0.B0, &x0)
+	z.C0.B1.Set(&x01)
+	z.C0.B2.Set(&x1)
+	z.C1.B0.SetZero()
+	z.C1.B1.Set(&x04)
+	z.C1.B2.Set(&x14)
+
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/e12_test.go b/ecc/bls12-378/internal/fptower/e12_test.go
new file mode 100644
index 000000000..939f945bf
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12_test.go
@@ -0,0 +1,492 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE12Serialization(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+
+	properties.Property("[BLS12-378] SetBytes(Bytes()) should stay constant", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			buf := a.Bytes()
+			if err := b.SetBytes(buf[:]); err != nil {
+				return false
+			}
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE12ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genB := GenE12()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Cyclotomic square) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.CyclotomicSquare(a)
+			a.CyclotomicSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Frobenius) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Frobenius(a)
+			a.Frobenius(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (FrobeniusSquare) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusSquare(a)
+			a.FrobeniusSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (FrobeniusCube) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusCube(a)
+			a.FrobeniusCube(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE12Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genB := GenE12()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E12) bool {
+			var c E12
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			var e, f, g E6
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.C0)
+			f.Double(&a.C1)
+			return c.C1.Equal(&g) && d.C0.Equal(&g) && e.Equal(&c.C0) && f.Equal(&d.C1)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] pi**12=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Frobenius(a).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] (pi**2)**6=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusSquare(a).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] (pi**3)**4=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusCube(a).
+				FrobeniusCube(&b).
+				FrobeniusCube(&b).
+				FrobeniusCube(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] cyclotomic square (Granger-Scott) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquare(a)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] compressed cyclotomic square (Karabina) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquareCompressed(a).Decompress(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] batch decompress and individual decompress (Karabina) should be the same", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			// put in the cyclotomic subgroup
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+
+			var a2, a4, a17 E12
+			a2.Set(a)
+			a4.Set(a)
+			a17.Set(a)
+			a2.nSquareCompressed(2)
+			a4.nSquareCompressed(4)
+			a17.nSquareCompressed(17)
+			batch := BatchDecompress([]E12{a2, a4, a17})
+			a2.Decompress(&a2)
+			a4.Decompress(&a4)
+			a17.Decompress(&a17)
+
+			return a2.Equal(&batch[0]) && a4.Equal(&batch[1]) && a17.Equal(&batch[2])
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Frobenius of x in E12 should be equal to x^q", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.Frobenius(a)
+			c.Exp(a, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] FrobeniusSquare of x in E12 should be equal to x^(q^2)", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.FrobeniusSquare(a)
+			c.Exp(a, *q).Exp(&c, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] FrobeniusCube of x in E12 should be equal to x^(q^3)", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.FrobeniusCube(a)
+			c.Exp(a, *q).Exp(&c, *q).Exp(&c, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE12Add(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE12Sub(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE12Mul(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE12Cyclosquare(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.CyclotomicSquare(&a)
+	}
+}
+
+func BenchmarkE12Square(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE12Inverse(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE12Conjugate(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
+
+func BenchmarkE12Frobenius(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Frobenius(&a)
+	}
+}
+
+func BenchmarkE12FrobeniusSquare(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.FrobeniusSquare(&a)
+	}
+}
+
+func BenchmarkE12FrobeniusCube(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.FrobeniusCube(&a)
+	}
+}
+
+func BenchmarkE12Expt(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Expt(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/e2.go b/ecc/bls12-378/internal/fptower/e2.go
new file mode 100644
index 000000000..ff630a714
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2.go
@@ -0,0 +1,262 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"math/big"
+)
+
+// E2 is a degree two finite field extension of fp.Element
+type E2 struct {
+	A0, A1 fp.Element
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E2) Equal(x *E2) bool {
+	return z.A0.Equal(&x.A0) && z.A1.Equal(&x.A1)
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *E2) Cmp(x *E2) int {
+	if a1 := z.A1.Cmp(&x.A1); a1 != 0 {
+		return a1
+	}
+	return z.A0.Cmp(&x.A0)
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *E2) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	if z.A1.IsZero() {
+		return z.A0.LexicographicallyLargest()
+	}
+	return z.A1.LexicographicallyLargest()
+}
+
+// SetString sets a E2 element from strings
+func (z *E2) SetString(s1, s2 string) *E2 {
+	z.A0.SetString(s1)
+	z.A1.SetString(s2)
+	return z
+}
+
+// SetZero sets an E2 elmt to zero
+func (z *E2) SetZero() *E2 {
+	z.A0.SetZero()
+	z.A1.SetZero()
+	return z
+}
+
+// Set sets an E2 from x
+func (z *E2) Set(x *E2) *E2 {
+	z.A0 = x.A0
+	z.A1 = x.A1
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E2) SetOne() *E2 {
+	z.A0.SetOne()
+	z.A1.SetZero()
+	return z
+}
+
+// SetRandom sets a0 and a1 to random values
+func (z *E2) SetRandom() (*E2, error) {
+	if _, err := z.A0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// IsZero returns true if the two elements are equal, fasle otherwise
+func (z *E2) IsZero() bool {
+	return z.A0.IsZero() && z.A1.IsZero()
+}
+
+// Add adds two elements of E2
+func (z *E2) Add(x, y *E2) *E2 {
+	addE2(z, x, y)
+	return z
+}
+
+// Sub two elements of E2
+func (z *E2) Sub(x, y *E2) *E2 {
+	subE2(z, x, y)
+	return z
+}
+
+// Double doubles an E2 element
+func (z *E2) Double(x *E2) *E2 {
+	doubleE2(z, x)
+	return z
+}
+
+// Neg negates an E2 element
+func (z *E2) Neg(x *E2) *E2 {
+	negE2(z, x)
+	return z
+}
+
+// String implements Stringer interface for fancy printing
+func (z *E2) String() string {
+	return (z.A0.String() + "+" + z.A1.String() + "*u")
+}
+
+// ToMont converts to mont form
+func (z *E2) ToMont() *E2 {
+	z.A0.ToMont()
+	z.A1.ToMont()
+	return z
+}
+
+// FromMont converts from mont form
+func (z *E2) FromMont() *E2 {
+	z.A0.FromMont()
+	z.A1.FromMont()
+	return z
+}
+
+// MulByElement multiplies an element in E2 by an element in fp
+func (z *E2) MulByElement(x *E2, y *fp.Element) *E2 {
+	var yCopy fp.Element
+	yCopy.Set(y)
+	z.A0.Mul(&x.A0, &yCopy)
+	z.A1.Mul(&x.A1, &yCopy)
+	return z
+}
+
+// Conjugate conjugates an element in E2
+func (z *E2) Conjugate(x *E2) *E2 {
+	z.A0 = x.A0
+	z.A1.Neg(&x.A1)
+	return z
+}
+
+// Halve sets z = z / 2
+func (z *E2) Halve() {
+	z.A0.Halve()
+	z.A1.Halve()
+}
+
+// Legendre returns the Legendre symbol of z
+func (z *E2) Legendre() int {
+	var n fp.Element
+	z.norm(&n)
+	return n.Legendre()
+}
+
+// Exp sets z=x**e and returns it
+func (z *E2) Exp(x E2, exponent *big.Int) *E2 {
+	z.SetOne()
+	b := exponent.Bytes()
+	for i := 0; i < len(b); i++ {
+		w := b[i]
+		for j := 0; j < 8; j++ {
+			z.Square(z)
+			if (w & (0b10000000 >> j)) != 0 {
+				z.Mul(z, &x)
+			}
+		}
+	}
+
+	return z
+}
+
+// Sqrt sets z to the square root of and returns z
+// The function does not test wether the square root
+// exists or not, it's up to the caller to call
+// Legendre beforehand.
+// cf https://eprint.iacr.org/2012/685.pdf (algo 10)
+func (z *E2) Sqrt(x *E2) *E2 {
+
+	// precomputation
+	var b, c, d, e, f, x0 E2
+	var _b, o fp.Element
+
+	// c must be a non square (works for p=1 mod 12 hence 1 mod 4, only bls377 has such a p currently)
+	c.A1.SetOne()
+
+	q := fp.Modulus()
+	var exp, one big.Int
+	one.SetUint64(1)
+	exp.Set(q).Sub(&exp, &one).Rsh(&exp, 1)
+	d.Exp(c, &exp)
+	e.Mul(&d, &c).Inverse(&e)
+	f.Mul(&d, &c).Square(&f)
+
+	// computation
+	exp.Rsh(&exp, 1)
+	b.Exp(*x, &exp)
+	b.norm(&_b)
+	o.SetOne()
+	if _b.Equal(&o) {
+		x0.Square(&b).Mul(&x0, x)
+		_b.Set(&x0.A0).Sqrt(&_b)
+		z.Conjugate(&b).MulByElement(z, &_b)
+		return z
+	}
+	x0.Square(&b).Mul(&x0, x).Mul(&x0, &f)
+	_b.Set(&x0.A0).Sqrt(&_b)
+	z.Conjugate(&b).MulByElement(z, &_b).Mul(z, &e)
+
+	return z
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []E2) []E2 {
+	res := make([]E2, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	var accumulator E2
+	accumulator.SetOne()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i].Set(&accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_amd64.go b/ecc/bls12-378/internal/fptower/e2_amd64.go
new file mode 100644
index 000000000..1e55f1994
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_amd64.go
@@ -0,0 +1,45 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// q (modulus)
+var qE2 = [6]uint64{
+	11045256207009841153,
+	14886639130118979584,
+	10956628289047010687,
+	9513184293603517222,
+	6038022134869067682,
+	283357621510263184,
+}
+
+// q'[0], see montgommery multiplication algorithm
+var (
+	qE2Inv0 uint64 = 11045256207009841151
+	_              = qE2Inv0 // used in asm
+)
+
+//go:noescape
+func addE2(res, x, y *E2)
+
+//go:noescape
+func subE2(res, x, y *E2)
+
+//go:noescape
+func doubleE2(res, x *E2)
+
+//go:noescape
+func negE2(res, x *E2)
diff --git a/ecc/bls12-378/internal/fptower/e2_amd64.s b/ecc/bls12-378/internal/fptower/e2_amd64.s
new file mode 100644
index 000000000..db266c308
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_amd64.s
@@ -0,0 +1,320 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+TEXT ·addE2(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), BX
+	MOVQ 8(AX), SI
+	MOVQ 16(AX), DI
+	MOVQ 24(AX), R8
+	MOVQ 32(AX), R9
+	MOVQ 40(AX), R10
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), BX
+	ADCQ 8(DX), SI
+	ADCQ 16(DX), DI
+	ADCQ 24(DX), R8
+	ADCQ 32(DX), R9
+	ADCQ 40(DX), R10
+
+	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
+	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
+
+	MOVQ res+0(FP), CX
+	MOVQ BX, 0(CX)
+	MOVQ SI, 8(CX)
+	MOVQ DI, 16(CX)
+	MOVQ R8, 24(CX)
+	MOVQ R9, 32(CX)
+	MOVQ R10, 40(CX)
+	MOVQ 48(AX), BX
+	MOVQ 56(AX), SI
+	MOVQ 64(AX), DI
+	MOVQ 72(AX), R8
+	MOVQ 80(AX), R9
+	MOVQ 88(AX), R10
+	ADDQ 48(DX), BX
+	ADCQ 56(DX), SI
+	ADCQ 64(DX), DI
+	ADCQ 72(DX), R8
+	ADCQ 80(DX), R9
+	ADCQ 88(DX), R10
+
+	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
+	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
+
+	MOVQ BX, 48(CX)
+	MOVQ SI, 56(CX)
+	MOVQ DI, 64(CX)
+	MOVQ R8, 72(CX)
+	MOVQ R9, 80(CX)
+	MOVQ R10, 88(CX)
+	RET
+
+TEXT ·doubleE2(SB), NOSPLIT, $0-16
+	MOVQ res+0(FP), DX
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	ADDQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ CX, 0(DX)
+	MOVQ BX, 8(DX)
+	MOVQ SI, 16(DX)
+	MOVQ DI, 24(DX)
+	MOVQ R8, 32(DX)
+	MOVQ R9, 40(DX)
+	MOVQ 48(AX), CX
+	MOVQ 56(AX), BX
+	MOVQ 64(AX), SI
+	MOVQ 72(AX), DI
+	MOVQ 80(AX), R8
+	MOVQ 88(AX), R9
+	ADDQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ CX, 48(DX)
+	MOVQ BX, 56(DX)
+	MOVQ SI, 64(DX)
+	MOVQ DI, 72(DX)
+	MOVQ R8, 80(DX)
+	MOVQ R9, 88(DX)
+	RET
+
+TEXT ·subE2(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    x+8(FP), R8
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R10
+	MOVQ    AX, 0(R10)
+	MOVQ    DX, 8(R10)
+	MOVQ    CX, 16(R10)
+	MOVQ    BX, 24(R10)
+	MOVQ    SI, 32(R10)
+	MOVQ    DI, 40(R10)
+	MOVQ    48(R8), AX
+	MOVQ    56(R8), DX
+	MOVQ    64(R8), CX
+	MOVQ    72(R8), BX
+	MOVQ    80(R8), SI
+	MOVQ    88(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    48(R8), AX
+	SBBQ    56(R8), DX
+	SBBQ    64(R8), CX
+	SBBQ    72(R8), BX
+	SBBQ    80(R8), SI
+	SBBQ    88(R8), DI
+	MOVQ    $0x9948a20000000001, R11
+	MOVQ    $0xce97f76a822c0000, R12
+	MOVQ    $0x980dc360d0a49d7f, R13
+	MOVQ    $0x84059eb647102326, R14
+	MOVQ    $0x53cb5d240ed107a2, R15
+	MOVQ    $0x03eeb0416684d190, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	CMOVQCC R9, R10
+	ADDQ    R11, AX
+	ADCQ    R12, DX
+	ADCQ    R13, CX
+	ADCQ    R14, BX
+	ADCQ    R15, SI
+	ADCQ    R10, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 48(R8)
+	MOVQ    DX, 56(R8)
+	MOVQ    CX, 64(R8)
+	MOVQ    BX, 72(R8)
+	MOVQ    SI, 80(R8)
+	MOVQ    DI, 88(R8)
+	RET
+
+TEXT ·negE2(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), DX
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), BX
+	MOVQ  8(AX), SI
+	MOVQ  16(AX), DI
+	MOVQ  24(AX), R8
+	MOVQ  32(AX), R9
+	MOVQ  40(AX), R10
+	MOVQ  BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	TESTQ AX, AX
+	JNE   l1
+	MOVQ  AX, 0(DX)
+	MOVQ  AX, 8(DX)
+	MOVQ  AX, 16(DX)
+	MOVQ  AX, 24(DX)
+	MOVQ  AX, 32(DX)
+	MOVQ  AX, 40(DX)
+	JMP   l3
+
+l1:
+	MOVQ $0x9948a20000000001, CX
+	SUBQ BX, CX
+	MOVQ CX, 0(DX)
+	MOVQ $0xce97f76a822c0000, CX
+	SBBQ SI, CX
+	MOVQ CX, 8(DX)
+	MOVQ $0x980dc360d0a49d7f, CX
+	SBBQ DI, CX
+	MOVQ CX, 16(DX)
+	MOVQ $0x84059eb647102326, CX
+	SBBQ R8, CX
+	MOVQ CX, 24(DX)
+	MOVQ $0x53cb5d240ed107a2, CX
+	SBBQ R9, CX
+	MOVQ CX, 32(DX)
+	MOVQ $0x03eeb0416684d190, CX
+	SBBQ R10, CX
+	MOVQ CX, 40(DX)
+
+l3:
+	MOVQ  x+8(FP), AX
+	MOVQ  48(AX), BX
+	MOVQ  56(AX), SI
+	MOVQ  64(AX), DI
+	MOVQ  72(AX), R8
+	MOVQ  80(AX), R9
+	MOVQ  88(AX), R10
+	MOVQ  BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	TESTQ AX, AX
+	JNE   l2
+	MOVQ  AX, 48(DX)
+	MOVQ  AX, 56(DX)
+	MOVQ  AX, 64(DX)
+	MOVQ  AX, 72(DX)
+	MOVQ  AX, 80(DX)
+	MOVQ  AX, 88(DX)
+	RET
+
+l2:
+	MOVQ $0x9948a20000000001, CX
+	SUBQ BX, CX
+	MOVQ CX, 48(DX)
+	MOVQ $0xce97f76a822c0000, CX
+	SBBQ SI, CX
+	MOVQ CX, 56(DX)
+	MOVQ $0x980dc360d0a49d7f, CX
+	SBBQ DI, CX
+	MOVQ CX, 64(DX)
+	MOVQ $0x84059eb647102326, CX
+	SBBQ R8, CX
+	MOVQ CX, 72(DX)
+	MOVQ $0x53cb5d240ed107a2, CX
+	SBBQ R9, CX
+	MOVQ CX, 80(DX)
+	MOVQ $0x03eeb0416684d190, CX
+	SBBQ R10, CX
+	MOVQ CX, 88(DX)
+	RET
diff --git a/ecc/bls12-378/internal/fptower/e2_bls378.go b/ecc/bls12-378/internal/fptower/e2_bls378.go
new file mode 100644
index 000000000..677180aef
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_bls378.go
@@ -0,0 +1,104 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+// Mul sets z to the E2-product of x,y, returns z
+func (z *E2) Mul(x, y *E2) *E2 {
+	var a, b, c fp.Element
+	a.Add(&x.A0, &x.A1)
+	b.Add(&y.A0, &y.A1)
+	a.Mul(&a, &b)
+	b.Mul(&x.A0, &y.A0)
+	c.Mul(&x.A1, &y.A1)
+	z.A1.Sub(&a, &b).Sub(&z.A1, &c)
+	fp.MulBy5(&c)
+	z.A0.Sub(&b, &c)
+	return z
+}
+
+// Square sets z to the E2-product of x,x returns z
+func (z *E2) Square(x *E2) *E2 {
+	//algo 22 https://eprint.iacr.org/2010/354.pdf
+	var c0, c2 fp.Element
+	c0.Add(&x.A0, &x.A1)
+	c2.Neg(&x.A1)
+	fp.MulBy5(&c2)
+	c2.Add(&c2, &x.A0)
+
+	c0.Mul(&c0, &c2) // (x1+x2)*(x1+(u**2)x2)
+	c2.Mul(&x.A0, &x.A1).Double(&c2)
+	z.A1 = c2
+	c2.Double(&c2)
+	z.A0.Add(&c0, &c2)
+
+	return z
+}
+
+// MulByNonResidue multiplies a E2 by (0,1)
+func (z *E2) MulByNonResidue(x *E2) *E2 {
+	a := x.A0
+	b := x.A1 // fetching x.A1 in the function below is slower
+	fp.MulBy5(&b)
+	z.A0.Neg(&b)
+	z.A1 = a
+	return z
+}
+
+// MulByNonResidueInv multiplies a E2 by (0,1)^{-1}
+func (z *E2) MulByNonResidueInv(x *E2) *E2 {
+	//z.A1.MulByNonResidueInv(&x.A0)
+	a := x.A1
+	fiveinv := fp.Element{
+		4714375566610504077,
+		585136512338283717,
+		16899133777167898908,
+		1882388787078723660,
+		12465292654455594957,
+		119042783712594200,
+	}
+	z.A1.Mul(&x.A0, &fiveinv).Neg(&z.A1)
+	z.A0 = a
+	return z
+}
+
+// Inverse sets z to the E2-inverse of x, returns z
+func (z *E2) Inverse(x *E2) *E2 {
+	// Algorithm 8 from https://eprint.iacr.org/2010/354.pdf
+	//var a, b, t0, t1, tmp fp.Element
+	var t0, t1, tmp fp.Element
+	a := &x.A0 // creating the buffers a, b is faster than querying &x.A0, &x.A1 in the functions call below
+	b := &x.A1
+	t0.Square(a)
+	t1.Square(b)
+	tmp.Set(&t1)
+	fp.MulBy5(&tmp)
+	t0.Add(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.A0.Mul(a, &t1)
+	z.A1.Mul(b, &t1).Neg(&z.A1)
+
+	return z
+}
+
+// norm sets x to the norm of z
+func (z *E2) norm(x *fp.Element) {
+	var tmp fp.Element
+	x.Square(&z.A1)
+	tmp.Set(x)
+	fp.MulBy5(&tmp)
+	x.Square(&z.A0).Add(x, &tmp)
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_fallback.go b/ecc/bls12-378/internal/fptower/e2_fallback.go
new file mode 100644
index 000000000..0ce4d8333
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_fallback.go
@@ -0,0 +1,40 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+func addE2(z, x, y *E2) {
+	z.A0.Add(&x.A0, &y.A0)
+	z.A1.Add(&x.A1, &y.A1)
+}
+
+func subE2(z, x, y *E2) {
+	z.A0.Sub(&x.A0, &y.A0)
+	z.A1.Sub(&x.A1, &y.A1)
+}
+
+func doubleE2(z, x *E2) {
+	z.A0.Double(&x.A0)
+	z.A1.Double(&x.A1)
+}
+
+func negE2(z, x *E2) {
+	z.A0.Neg(&x.A0)
+	z.A1.Neg(&x.A1)
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_test.go b/ecc/bls12-378/internal/fptower/e2_test.go
new file mode 100644
index 000000000..0c3f7e257
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_test.go
@@ -0,0 +1,506 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"crypto/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE2ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE2()
+	genB := GenE2()
+	genfp := GenFp()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue inverse) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidueInv(a)
+			a.MulByNonResidueInv(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by element) should output the same result", prop.ForAll(
+		func(a *E2, b fp.Element) bool {
+			var c E2
+			c.MulByElement(a, &b)
+			a.MulByElement(a, &b)
+			return a.Equal(&c)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Sqrt) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d, s E2
+
+			s.Square(a)
+			a.Set(&s)
+			b.Set(&s)
+
+			a.Sqrt(a)
+			b.Sqrt(&b)
+
+			c.Square(a)
+			d.Square(&b)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestE2MulMaxed(t *testing.T) {
+	// let's pick a and b, with maxed A0 and A1
+	var a, b E2
+	fpMaxValue := fp.Element{
+		11045256207009841153,
+		14886639130118979584,
+		10956628289047010687,
+		9513184293603517222,
+		6038022134869067682,
+		283357621510263184,
+	}
+	fpMaxValue[0]--
+
+	a.A0 = fpMaxValue
+	a.A1 = fpMaxValue
+	b.A0 = fpMaxValue
+	b.A1 = fpMaxValue
+
+	var c, d E2
+	d.Inverse(&b)
+	c.Set(&a)
+	c.Mul(&c, &b).Mul(&c, &d)
+	if !c.Equal(&a) {
+		t.Fatal("mul with max fp failed")
+	}
+}
+
+func TestE2Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE2()
+	genB := GenE2()
+	genfp := GenFp()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E2) bool {
+			var c E2
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] BatchInvert should output the same result as Inverse", prop.ForAll(
+		func(a, b, c *E2) bool {
+
+			batch := BatchInvert([]E2{*a, *b, *c})
+			a.Inverse(a)
+			b.Inverse(b)
+			c.Inverse(c)
+			return a.Equal(&batch[0]) && b.Equal(&batch[1]) && c.Equal(&batch[2])
+		},
+		genA,
+		genA,
+		genA,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b, c E2
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] MulByElement MulByElement inverse should leave an element invariant", prop.ForAll(
+		func(a *E2, b fp.Element) bool {
+			var c E2
+			var d fp.Element
+			d.Inverse(&b)
+			c.MulByElement(a, &b).MulByElement(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BLS12-378] Double and mul by 2 should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			var c fp.Element
+			c.SetUint64(2)
+			b.Double(a)
+			a.MulByElement(a, &c)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Mulbynonres mulbynonresinv should leave the element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidue(a).MulByNonResidueInv(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d E2
+			var e, f fp.Element
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.A0)
+			f.Double(&a.A1)
+			return c.A1.IsZero() && d.A0.IsZero() && e.Equal(&c.A0) && f.Equal(&d.A1)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Legendre on square should output 1", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Square(a)
+			c := b.Legendre()
+			return c == 1
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square(sqrt) should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d, e E2
+			b.Square(a)
+			c.Sqrt(&b)
+			d.Square(&c)
+			e.Neg(a)
+			return (c.Equal(a) || c.Equal(&e)) && d.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg(E2) == neg(E2.A0, E2.A1)", prop.ForAll(
+		func(a *E2) bool {
+			var b, c E2
+			b.Neg(a)
+			c.A0.Neg(&a.A0)
+			c.A1.Neg(&a.A1)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Cmp and LexicographicallyLargest should be consistant", prop.ForAll(
+		func(a *E2) bool {
+			var negA E2
+			negA.Neg(a)
+			cmpResult := a.Cmp(&negA)
+			lResult := a.LexicographicallyLargest()
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE2Add(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE2Sub(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE2Mul(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE2MulByElement(b *testing.B) {
+	var a E2
+	var c fp.Element
+	c.SetRandom()
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByElement(&a, &c)
+	}
+}
+
+func BenchmarkE2Square(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE2Sqrt(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sqrt(&a)
+	}
+}
+
+func BenchmarkE2Exp(b *testing.B) {
+	var x E2
+	x.SetRandom()
+	b1, _ := rand.Int(rand.Reader, fp.Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		x.Exp(x, b1)
+	}
+}
+
+func BenchmarkE2Inverse(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE2MulNonRes(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidue(&a)
+	}
+}
+
+func BenchmarkE2MulNonResInv(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidueInv(&a)
+	}
+}
+
+func BenchmarkE2Conjugate(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/e6.go b/ecc/bls12-378/internal/fptower/e6.go
new file mode 100644
index 000000000..adc33ceef
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e6.go
@@ -0,0 +1,264 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// E6 is a degree three finite field extension of fp2
+type E6 struct {
+	B0, B1, B2 E2
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E6) Equal(x *E6) bool {
+	return z.B0.Equal(&x.B0) && z.B1.Equal(&x.B1) && z.B2.Equal(&x.B2)
+}
+
+// SetString sets a E6 elmt from stringf
+func (z *E6) SetString(s1, s2, s3, s4, s5, s6 string) *E6 {
+	z.B0.SetString(s1, s2)
+	z.B1.SetString(s3, s4)
+	z.B2.SetString(s5, s6)
+	return z
+}
+
+// Set Sets a E6 elmt form another E6 elmt
+func (z *E6) Set(x *E6) *E6 {
+	z.B0 = x.B0
+	z.B1 = x.B1
+	z.B2 = x.B2
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E6) SetOne() *E6 {
+	*z = E6{}
+	z.B0.A0.SetOne()
+	return z
+}
+
+// SetRandom set z to a random elmt
+func (z *E6) SetRandom() (*E6, error) {
+	if _, err := z.B0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B1.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B2.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// ToMont converts to Mont form
+func (z *E6) ToMont() *E6 {
+	z.B0.ToMont()
+	z.B1.ToMont()
+	z.B2.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E6) FromMont() *E6 {
+	z.B0.FromMont()
+	z.B1.FromMont()
+	z.B2.FromMont()
+	return z
+}
+
+// Add adds two elements of E6
+func (z *E6) Add(x, y *E6) *E6 {
+	z.B0.Add(&x.B0, &y.B0)
+	z.B1.Add(&x.B1, &y.B1)
+	z.B2.Add(&x.B2, &y.B2)
+	return z
+}
+
+// Neg negates the E6 number
+func (z *E6) Neg(x *E6) *E6 {
+	z.B0.Neg(&x.B0)
+	z.B1.Neg(&x.B1)
+	z.B2.Neg(&x.B2)
+	return z
+}
+
+// Sub two elements of E6
+func (z *E6) Sub(x, y *E6) *E6 {
+	z.B0.Sub(&x.B0, &y.B0)
+	z.B1.Sub(&x.B1, &y.B1)
+	z.B2.Sub(&x.B2, &y.B2)
+	return z
+}
+
+// Double doubles an element in E6
+func (z *E6) Double(x *E6) *E6 {
+	z.B0.Double(&x.B0)
+	z.B1.Double(&x.B1)
+	z.B2.Double(&x.B2)
+	return z
+}
+
+// String puts E6 elmt in string form
+func (z *E6) String() string {
+	return (z.B0.String() + "+(" + z.B1.String() + ")*v+(" + z.B2.String() + ")*v**2")
+}
+
+// MulByNonResidue mul x by (0,1,0)
+func (z *E6) MulByNonResidue(x *E6) *E6 {
+	z.B2, z.B1, z.B0 = x.B1, x.B0, x.B2
+	z.B0.MulByNonResidue(&z.B0)
+	return z
+}
+
+// MulByE2 multiplies an element in E6 by an element in E2
+func (z *E6) MulByE2(x *E6, y *E2) *E6 {
+	var yCopy E2
+	yCopy.Set(y)
+	z.B0.Mul(&x.B0, &yCopy)
+	z.B1.Mul(&x.B1, &yCopy)
+	z.B2.Mul(&x.B2, &yCopy)
+	return z
+}
+
+// MulBy01 multiplication by sparse element (c0,c1,0)
+func (z *E6) MulBy01(c0, c1 *E2) *E6 {
+
+	var a, b, tmp, t0, t1, t2 E2
+
+	a.Mul(&z.B0, c0)
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+	t0.Add(&t0, &a)
+
+	tmp.Add(&z.B0, &z.B2)
+	t2.Mul(c0, &tmp)
+	t2.Sub(&t2, &a)
+	t2.Add(&t2, &b)
+
+	t1.Add(c0, c1)
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(&t1, &tmp)
+	t1.Sub(&t1, &a)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&t2)
+
+	return z
+}
+
+// MulBy1 multiplication of E6 by sparse element (0, c1, 0)
+func (z *E6) MulBy1(c1 *E2) *E6 {
+
+	var b, tmp, t0, t1 E2
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(c1, &tmp)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&b)
+
+	return z
+}
+
+// Mul sets z to the E6 product of x,y, returns z
+func (z *E6) Mul(x, y *E6) *E6 {
+	// Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
+	var t0, t1, t2, c0, c1, c2, tmp E2
+	t0.Mul(&x.B0, &y.B0)
+	t1.Mul(&x.B1, &y.B1)
+	t2.Mul(&x.B2, &y.B2)
+
+	c0.Add(&x.B1, &x.B2)
+	tmp.Add(&y.B1, &y.B2)
+	c0.Mul(&c0, &tmp).Sub(&c0, &t1).Sub(&c0, &t2).MulByNonResidue(&c0).Add(&c0, &t0)
+
+	c1.Add(&x.B0, &x.B1)
+	tmp.Add(&y.B0, &y.B1)
+	c1.Mul(&c1, &tmp).Sub(&c1, &t0).Sub(&c1, &t1)
+	tmp.MulByNonResidue(&t2)
+	c1.Add(&c1, &tmp)
+
+	tmp.Add(&x.B0, &x.B2)
+	c2.Add(&y.B0, &y.B2).Mul(&c2, &tmp).Sub(&c2, &t0).Sub(&c2, &t2).Add(&c2, &t1)
+
+	z.B0.Set(&c0)
+	z.B1.Set(&c1)
+	z.B2.Set(&c2)
+
+	return z
+}
+
+// Square sets z to the E6 product of x,x, returns z
+func (z *E6) Square(x *E6) *E6 {
+
+	// Algorithm 16 from https://eprint.iacr.org/2010/354.pdf
+	var c4, c5, c1, c2, c3, c0 E2
+	c4.Mul(&x.B0, &x.B1).Double(&c4)
+	c5.Square(&x.B2)
+	c1.MulByNonResidue(&c5).Add(&c1, &c4)
+	c2.Sub(&c4, &c5)
+	c3.Square(&x.B0)
+	c4.Sub(&x.B0, &x.B1).Add(&c4, &x.B2)
+	c5.Mul(&x.B1, &x.B2).Double(&c5)
+	c4.Square(&c4)
+	c0.MulByNonResidue(&c5).Add(&c0, &c3)
+	z.B2.Add(&c2, &c4).Add(&z.B2, &c5).Sub(&z.B2, &c3)
+	z.B0.Set(&c0)
+	z.B1.Set(&c1)
+
+	return z
+}
+
+// Inverse an element in E6
+func (z *E6) Inverse(x *E6) *E6 {
+	// Algorithm 17 from https://eprint.iacr.org/2010/354.pdf
+	// step 9 is wrong in the paper it's t1-t4
+	var t0, t1, t2, t3, t4, t5, t6, c0, c1, c2, d1, d2 E2
+	t0.Square(&x.B0)
+	t1.Square(&x.B1)
+	t2.Square(&x.B2)
+	t3.Mul(&x.B0, &x.B1)
+	t4.Mul(&x.B0, &x.B2)
+	t5.Mul(&x.B1, &x.B2)
+	c0.MulByNonResidue(&t5).Neg(&c0).Add(&c0, &t0)
+	c1.MulByNonResidue(&t2).Sub(&c1, &t3)
+	c2.Sub(&t1, &t4)
+	t6.Mul(&x.B0, &c0)
+	d1.Mul(&x.B2, &c1)
+	d2.Mul(&x.B1, &c2)
+	d1.Add(&d1, &d2).MulByNonResidue(&d1)
+	t6.Add(&t6, &d1)
+	t6.Inverse(&t6)
+	z.B0.Mul(&c0, &t6)
+	z.B1.Mul(&c1, &t6)
+	z.B2.Mul(&c2, &t6)
+
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/e6_test.go b/ecc/bls12-378/internal/fptower/e6_test.go
new file mode 100644
index 000000000..b6d418d30
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e6_test.go
@@ -0,0 +1,317 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE6ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+	genE2 := GenE2()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by E2) should output the same result", prop.ForAll(
+		func(a *E6, b *E2) bool {
+			var c E6
+			c.MulByE2(a, b)
+			a.MulByE2(a, b)
+			return a.Equal(&c)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+	genE2 := GenE2()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c E6
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Double and add twice should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Add(a, a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Mul by non residue should be the same as multiplying by (0,1,0)", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.B1.A0.SetOne()
+			c.Mul(a, &b)
+			a.MulByNonResidue(a)
+			return a.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] MulByE2 MulByE2 inverse should leave an element invariant", prop.ForAll(
+		func(a *E6, b *E2) bool {
+			var c E6
+			var d E2
+			d.Inverse(b)
+			c.MulByE2(a, b).MulByE2(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.Property("[BLS12-378] Mul and MulBy01 should output the same result", prop.ForAll(
+		func(a *E6, c0, c1 *E2) bool {
+			var b E6
+			b.B0.Set(c0)
+			b.B1.Set(c1)
+			b.Mul(&b, a)
+			a.MulBy01(c0, c1)
+			return b.Equal(a)
+		},
+		genA,
+		genE2,
+		genE2,
+	))
+
+	properties.Property("[BLS12-378] Mul and MulBy1 should output the same result", prop.ForAll(
+		func(a *E6, c1 *E2) bool {
+			var b E6
+			b.B1.Set(c1)
+			b.Mul(&b, a)
+			a.MulBy1(c1)
+			return b.Equal(a)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE6Add(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE6Sub(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE6Mul(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE6Square(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE6Inverse(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/frobenius.go b/ecc/bls12-378/internal/fptower/frobenius.go
new file mode 100644
index 000000000..4d477043a
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/frobenius.go
@@ -0,0 +1,305 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+// Frobenius set z to Frobenius(x), return z
+func (z *E12) Frobenius(x *E12) *E12 {
+	// Algorithm 28 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	// Frobenius acts on fp2 by conjugation
+	t[0].Conjugate(&x.C0.B0)
+	t[1].Conjugate(&x.C0.B1)
+	t[2].Conjugate(&x.C0.B2)
+	t[3].Conjugate(&x.C1.B0)
+	t[4].Conjugate(&x.C1.B1)
+	t[5].Conjugate(&x.C1.B2)
+
+	t[1].MulByNonResidue1Power2(&t[1])
+	t[2].MulByNonResidue1Power4(&t[2])
+	t[3].MulByNonResidue1Power1(&t[3])
+	t[4].MulByNonResidue1Power3(&t[4])
+	t[5].MulByNonResidue1Power5(&t[5])
+
+	z.C0.B0 = t[0]
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// FrobeniusSquare set z to Frobenius^2(x), and return z
+func (z *E12) FrobeniusSquare(x *E12) *E12 {
+	// Algorithm 29 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	t[1].MulByNonResidue2Power2(&x.C0.B1)
+	t[2].MulByNonResidue2Power4(&x.C0.B2)
+	t[3].MulByNonResidue2Power1(&x.C1.B0)
+	t[4].MulByNonResidue2Power3(&x.C1.B1)
+	t[5].MulByNonResidue2Power5(&x.C1.B2)
+
+	z.C0.B0 = x.C0.B0
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// FrobeniusCube set z to Frobenius^3(x), return z
+func (z *E12) FrobeniusCube(x *E12) *E12 {
+	// Algorithm 30 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	// Frobenius^3 acts on fp2 by conjugation
+	t[0].Conjugate(&x.C0.B0)
+	t[1].Conjugate(&x.C0.B1)
+	t[2].Conjugate(&x.C0.B2)
+	t[3].Conjugate(&x.C1.B0)
+	t[4].Conjugate(&x.C1.B1)
+	t[5].Conjugate(&x.C1.B2)
+
+	t[1].MulByNonResidue3Power2(&t[1])
+	t[3].MulByNonResidue3Power1(&t[3])
+	t[4].MulByNonResidue3Power3(&t[4])
+	t[5].MulByNonResidue3Power5(&t[5])
+
+	z.C0.B0 = t[0]
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// MulByNonResidue1Power1 set z=x*(0,1)^(1*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power1(x *E2) *E2 {
+	b := fp.Element{
+		9424304261440581301,
+		15622662318784019360,
+		5704744713545767383,
+		7376930514650170538,
+		2328236726423359970,
+		256435709676028998,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power2 set z=x*(0,1)^(2*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power2(x *E2) *E2 {
+	b := fp.Element{
+		1263886799460835702,
+		3481310115429540252,
+		1430516082310201521,
+		10760454131030452261,
+		15881431079209118478,
+		56234068425139279,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power3 set z=x*(0,1)^(3*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power3(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power4 set z=x*(0,1)^(4*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power4(x *E2) *E2 {
+	b := fp.Element{
+		18229265454137549239,
+		11882161740266529218,
+		12635080069402934820,
+		1928134709134316785,
+		2524500224088382290,
+		27735392882694645,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power5 set z=x*(0,1)^(5*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power5(x *E2) *E2 {
+	b := fp.Element{
+		7935976750720062874,
+		15312939023531261798,
+		15806716224795225087,
+		16245402142124945993,
+		7862827682069246910,
+		277569374620018935,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power1 set z=x*(0,1)^(1*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power1(x *E2) *E2 {
+	b := fp.Element{
+		1263886799460835702,
+		3481310115429540252,
+		1430516082310201521,
+		10760454131030452261,
+		15881431079209118478,
+		56234068425139279,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power2 set z=x*(0,1)^(2*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power2(x *E2) *E2 {
+	b := fp.Element{
+		18229265454137549239,
+		11882161740266529218,
+		12635080069402934820,
+		1928134709134316785,
+		2524500224088382290,
+		27735392882694645,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power3 set z=x*(0,1)^(3*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power3(x *E2) *E2 {
+	b := fp.Element{
+		9563890787977003074,
+		4840746681246416935,
+		3714448202430192371,
+		680864871707381747,
+		11127835353457883110,
+		254858945967818549,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power4 set z=x*(0,1)^(4*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power4(x *E2) *E2 {
+	b := fp.Element{
+		9781369407549005451,
+		11405329014689439332,
+		9526112206736809166,
+		17199474236282616577,
+		8603335129369500819,
+		227123553085123904,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power5 set z=x*(0,1)^(5*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power5(x *E2) *E2 {
+	b := fp.Element{
+		11262734826581843530,
+		3004477389852450365,
+		16768292293353627483,
+		7585049584469200436,
+		3513521910780685392,
+		255622228627568539,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power1 set z=x*(0,1)^(1*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power1(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power2 set z=x*(0,1)^(2*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power2(x *E2) *E2 {
+	b := fp.Element{
+		9563890787977003074,
+		4840746681246416935,
+		3714448202430192371,
+		680864871707381747,
+		11127835353457883110,
+		254858945967818549,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power3 set z=x*(0,1)^(3*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power3(x *E2) *E2 {
+	b := fp.Element{
+		4730231401859038131,
+		17284420991632229626,
+		401795639753028903,
+		13850780004141469529,
+		1884979861245528483,
+		32710158724478435,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power5 set z=x*(0,1)^(5*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power5(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/generators_test.go b/ecc/bls12-378/internal/fptower/generators_test.go
new file mode 100644
index 000000000..b735007c2
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/generators_test.go
@@ -0,0 +1,51 @@
+package fptower
+
+import (
+	"crypto/rand"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+)
+
+// Fp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		rand.Read(b[:])
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// E2 generates an E2 elmt
+func GenE2() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) *E2 {
+		return &E2{A0: values[0].(fp.Element), A1: values[1].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE2(),
+		GenE2(),
+		GenE2(),
+	).Map(func(values []interface{}) *E6 {
+		return &E6{B0: *values[0].(*E2), B1: *values[1].(*E2), B2: *values[2].(*E2)}
+	})
+}
+
+// E12 generates an E6 elmt
+func GenE12() gopter.Gen {
+	return gopter.CombineGens(
+		GenE6(),
+		GenE6(),
+	).Map(func(values []interface{}) *E12 {
+		return &E12{C0: *values[0].(*E6), C1: *values[1].(*E6)}
+	})
+}
diff --git a/ecc/bls12-378/marshal.go b/ecc/bls12-378/marshal.go
new file mode 100644
index 000000000..0411ca061
--- /dev/null
+++ b/ecc/bls12-378/marshal.go
@@ -0,0 +1,1160 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+	"reflect"
+	"sync/atomic"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// To encode G1Affine and G2Affine points, we mask the most significant bits with these bits to specify without ambiguity
+// metadata needed for point (de)compression
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+const (
+	mMask                 byte = 0b111 << 5
+	mUncompressed         byte = 0b000 << 5
+	mUncompressedInfinity byte = 0b010 << 5
+	mCompressedSmallest   byte = 0b100 << 5
+	mCompressedLargest    byte = 0b101 << 5
+	mCompressedInfinity   byte = 0b110 << 5
+)
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fptower.SizeOfGT
+
+// Encoder writes bls12-378 object values to an output stream
+type Encoder struct {
+	w   io.Writer
+	n   int64 // written bytes
+	raw bool  // raw vs compressed encoding
+}
+
+// Decoder reads bls12-378 object values from an inbound stream
+type Decoder struct {
+	r             io.Reader
+	n             int64 // read bytes
+	subGroupCheck bool  // default to true
+}
+
+// NewDecoder returns a binary decoder supporting curve bls12-378 objects in both
+// compressed and uncompressed (raw) forms
+func NewDecoder(r io.Reader, options ...func(*Decoder)) *Decoder {
+	d := &Decoder{r: r, subGroupCheck: true}
+
+	for _, o := range options {
+		o(d)
+	}
+
+	return d
+}
+
+// Decode reads the binary encoding of v from the stream
+// type must be *uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, *[]G1Affine or *[]G2Affine
+func (dec *Decoder) Decode(v interface{}) (err error) {
+	rv := reflect.ValueOf(v)
+	if rv.Kind() != reflect.Ptr || rv.IsNil() || !rv.Elem().CanSet() {
+		return errors.New("bls12-378 decoder: unsupported type, need pointer")
+	}
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// in particular, careful attention must be given to usage of Bytes() method on Elements and Points
+	// that return an array (not a slice) of bytes. Using this is beneficial to minimize memallocs
+	// in very large (de)serialization upstream in gnark.
+	// (but detrimental to code lisibility here)
+	// TODO double check memory usage and factorize this
+
+	var buf [SizeOfG2AffineUncompressed]byte
+	var read int
+
+	switch t := v.(type) {
+	case *fr.Element:
+		read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fr.Bytes])
+		return
+	case *fp.Element:
+		read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fp.Bytes])
+		return
+	case *[]fr.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fr.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fr.Bytes])
+		}
+		return
+	case *[]fp.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fp.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fp.Bytes])
+		}
+		return
+	case *G1Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG1AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG1AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *G2Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG2AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG2AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *[]G1Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G1Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG1AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG1AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	case *[]G2Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G2Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG2AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG2AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("bls12-378 encoder: unsupported type")
+		}
+		err = binary.Read(dec.r, binary.BigEndian, t)
+		if err == nil {
+			dec.n += int64(n)
+		}
+		return
+	}
+}
+
+// BytesRead return total bytes read from reader
+func (dec *Decoder) BytesRead() int64 {
+	return dec.n
+}
+
+func (dec *Decoder) readUint32() (r uint32, err error) {
+	var read int
+	var buf [4]byte
+	read, err = io.ReadFull(dec.r, buf[:4])
+	dec.n += int64(read)
+	if err != nil {
+		return
+	}
+	r = binary.BigEndian.Uint32(buf[:4])
+	return
+}
+
+func isCompressed(msb byte) bool {
+	mData := msb & mMask
+	return !((mData == mUncompressed) || (mData == mUncompressedInfinity))
+}
+
+// NewEncoder returns a binary encoder supporting curve bls12-378 objects
+func NewEncoder(w io.Writer, options ...func(*Encoder)) *Encoder {
+	// default settings
+	enc := &Encoder{
+		w:   w,
+		n:   0,
+		raw: false,
+	}
+
+	// handle options
+	for _, option := range options {
+		option(enc)
+	}
+
+	return enc
+}
+
+// Encode writes the binary encoding of v to the stream
+// type must be uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, []G1Affine or []G2Affine
+func (enc *Encoder) Encode(v interface{}) (err error) {
+	if enc.raw {
+		return enc.encodeRaw(v)
+	}
+	return enc.encode(v)
+}
+
+// BytesWritten return total bytes written on writer
+func (enc *Encoder) BytesWritten() int64 {
+	return enc.n
+}
+
+// RawEncoding returns an option to use in NewEncoder(...) which sets raw encoding mode to true
+// points will not be compressed using this option
+func RawEncoding() func(*Encoder) {
+	return func(enc *Encoder) {
+		enc.raw = true
+	}
+}
+
+// NoSubgroupChecks returns an option to use in NewDecoder(...) which disable subgroup checks on the points
+// the decoder will read. Use with caution, as crafted points from an untrusted source can lead to crypto-attacks.
+func NoSubgroupChecks() func(*Decoder) {
+	return func(dec *Decoder) {
+		dec.subGroupCheck = false
+	}
+}
+
+func (enc *Encoder) encode(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+func (enc *Encoder) encodeRaw(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+// SizeOfG1AffineCompressed represents the size in bytes that a G1Affine need in binary form, compressed
+const SizeOfG1AffineCompressed = 48
+
+// SizeOfG1AffineUncompressed represents the size in bytes that a G1Affine need in binary form, uncompressed
+const SizeOfG1AffineUncompressed = SizeOfG1AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G1Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G1Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G1Affine) Bytes() (res [SizeOfG1AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G1Affine) RawBytes() (res [SizeOfG1AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G1Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G1Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG1AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG1AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG1AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G1Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G1Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
+
+// SizeOfG2AffineCompressed represents the size in bytes that a G2Affine need in binary form, compressed
+const SizeOfG2AffineCompressed = 48 * 2
+
+// SizeOfG2AffineUncompressed represents the size in bytes that a G2Affine need in binary form, uncompressed
+const SizeOfG2AffineUncompressed = SizeOfG2AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G2Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G2Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G2Affine) Bytes() (res [SizeOfG2AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	// p.X.A1 | p.X.A0
+	tmp = p.X.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	tmp = p.X.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	// p.Y.A1 | p.Y.A0
+	tmp = p.Y.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+
+	tmp = p.Y.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[136:144], tmp[0])
+	binary.BigEndian.PutUint64(res[128:136], tmp[1])
+	binary.BigEndian.PutUint64(res[120:128], tmp[2])
+	binary.BigEndian.PutUint64(res[112:120], tmp[3])
+	binary.BigEndian.PutUint64(res[104:112], tmp[4])
+	binary.BigEndian.PutUint64(res[96:104], tmp[5])
+
+	// we store X  and mask the most significant word with our metadata mask
+	// p.X.A1 | p.X.A0
+	tmp = p.X.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	tmp = p.X.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G2Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G2Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG2AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG2AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		// p.X.A1 | p.X.A0
+		p.X.A1.SetBytes(buf[:fp.Bytes])
+		p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+		// p.Y.A1 | p.Y.A0
+		p.Y.A1.SetBytes(buf[fp.Bytes*2 : fp.Bytes*3])
+		p.Y.A0.SetBytes(buf[fp.Bytes*3 : fp.Bytes*4])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	// p.X.A1 | p.X.A0
+	p.X.A1.SetBytes(bufX[:fp.Bytes])
+	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+	var YSquared, Y fptower.E2
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if YSquared.Legendre() == -1 {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+	Y.Sqrt(&YSquared)
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG2AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G2Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y.A0[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fptower.E2
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if YSquared.Legendre() == -1 {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+	Y.Sqrt(&YSquared)
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G2Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	// p.X.A1 | p.X.A0
+	p.X.A1.SetBytes(bufX[:fp.Bytes])
+	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+	// store mData in p.Y.A0[0]
+	p.Y.A0[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
diff --git a/ecc/bls12-378/marshal_test.go b/ecc/bls12-378/marshal_test.go
new file mode 100644
index 000000000..3fc6a5f12
--- /dev/null
+++ b/ecc/bls12-378/marshal_test.go
@@ -0,0 +1,467 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"bytes"
+	"io"
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+func TestEncoder(t *testing.T) {
+
+	// TODO need proper fuzz testing here
+
+	var inA uint64
+	var inB fr.Element
+	var inC fp.Element
+	var inD G1Affine
+	var inE G1Affine
+	var inF G2Affine
+	var inG []G1Affine
+	var inH []G2Affine
+	var inI []fp.Element
+	var inJ []fr.Element
+
+	// set values of inputs
+	inA = rand.Uint64()
+	inB.SetRandom()
+	inC.SetRandom()
+	inD.ScalarMultiplication(&g1GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	// inE --> infinity
+	inF.ScalarMultiplication(&g2GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	inG = make([]G1Affine, 2)
+	inH = make([]G2Affine, 0)
+	inG[1] = inD
+	inI = make([]fp.Element, 3)
+	inI[2] = inD.X
+	inJ = make([]fr.Element, 0)
+
+	// encode them, compressed and raw
+	var buf, bufRaw bytes.Buffer
+	enc := NewEncoder(&buf)
+	encRaw := NewEncoder(&bufRaw, RawEncoding())
+	toEncode := []interface{}{inA, &inB, &inC, &inD, &inE, &inF, inG, inH, inI, inJ}
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+		if err := encRaw.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	testDecode := func(t *testing.T, r io.Reader, n int64) {
+		dec := NewDecoder(r)
+		var outA uint64
+		var outB fr.Element
+		var outC fp.Element
+		var outD G1Affine
+		var outE G1Affine
+		outE.X.SetOne()
+		outE.Y.SetUint64(42)
+		var outF G2Affine
+		var outG []G1Affine
+		var outH []G2Affine
+		var outI []fp.Element
+		var outJ []fr.Element
+
+		toDecode := []interface{}{&outA, &outB, &outC, &outD, &outE, &outF, &outG, &outH, &outI, &outJ}
+		for _, v := range toDecode {
+			if err := dec.Decode(v); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// compare values
+		if inA != outA {
+			t.Fatal("didn't encode/decode uint64 value properly")
+		}
+
+		if !inB.Equal(&outB) || !inC.Equal(&outC) {
+			t.Fatal("decode(encode(Element) failed")
+		}
+		if !inD.Equal(&outD) || !inE.Equal(&outE) {
+			t.Fatal("decode(encode(G1Affine) failed")
+		}
+		if !inF.Equal(&outF) {
+			t.Fatal("decode(encode(G2Affine) failed")
+		}
+		if (len(inG) != len(outG)) || (len(inH) != len(outH)) {
+			t.Fatal("decode(encode(slice(points))) failed")
+		}
+		for i := 0; i < len(inG); i++ {
+			if !inG[i].Equal(&outG[i]) {
+				t.Fatal("decode(encode(slice(points))) failed")
+			}
+		}
+		if (len(inI) != len(outI)) || (len(inJ) != len(outJ)) {
+			t.Fatal("decode(encode(slice(elements))) failed")
+		}
+		for i := 0; i < len(inI); i++ {
+			if !inI[i].Equal(&outI[i]) {
+				t.Fatal("decode(encode(slice(elements))) failed")
+			}
+		}
+		if n != dec.BytesRead() {
+			t.Fatal("bytes read don't match bytes written")
+		}
+	}
+
+	// decode them
+	testDecode(t, &buf, enc.BytesWritten())
+	testDecode(t, &bufRaw, encRaw.BytesWritten())
+
+}
+
+func TestIsCompressed(t *testing.T) {
+	var g1Inf, g1 G1Affine
+	var g2Inf, g2 G2Affine
+
+	g1 = g1GenAff
+	g2 = g2GenAff
+
+	{
+		b := g1Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g1.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2.RawBytes() should be uncompressed")
+		}
+	}
+
+}
+
+func TestG1AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// define Gopters generators
+
+// GenFr generates an Fr element
+func GenFr() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fr.Element
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE2 generates an fptower.E2 elmt
+func GenE2() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) fptower.E2 {
+		return fptower.E2{A0: values[0].(fp.Element), A1: values[1].(fp.Element)}
+	})
+}
+
+// GenE6 generates an fptower.E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE2(),
+		GenE2(),
+		GenE2(),
+	).Map(func(values []interface{}) fptower.E6 {
+		return fptower.E6{B0: values[0].(fptower.E2), B1: values[1].(fptower.E2), B2: values[2].(fptower.E2)}
+	})
+}
+
+// GenE12 generates an fptower.E6 elmt
+func GenE12() gopter.Gen {
+	return gopter.CombineGens(
+		GenE6(),
+		GenE6(),
+	).Map(func(values []interface{}) fptower.E12 {
+		return fptower.E12{C0: values[0].(fptower.E6), C1: values[1].(fptower.E6)}
+	})
+}
+
+// GenBigInt generates a big.Int
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
diff --git a/ecc/bls12-378/multiexp.go b/ecc/bls12-378/multiexp.go
new file mode 100644
index 000000000..e9203d0d0
--- /dev/null
+++ b/ecc/bls12-378/multiexp.go
@@ -0,0 +1,2303 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+	"math"
+	"runtime"
+)
+
+// selector stores the index, mask and shifts needed to select bits from a scalar
+// it is used during the multiExp algorithm or the batch scalar multiplication
+type selector struct {
+	index uint64 // index in the multi-word scalar to select bits from
+	mask  uint64 // mask (c-bit wide)
+	shift uint64 // shift needed to get our bits on low positions
+
+	multiWordSelect bool   // set to true if we need to select bits from 2 words (case where c doesn't divide 64)
+	maskHigh        uint64 // same than mask, for index+1
+	shiftHigh       uint64 // same than shift, for index+1
+}
+
+// partitionScalars  compute, for each scalars over c-bit wide windows, nbChunk digits
+// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+// 2^{c} to the current digit, making it negative.
+// negative digits can be processed in a later step as adding -G into the bucket instead of G
+// (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
+// scalarsMont indicates wheter the provided scalars are in montgomery form
+// returns smallValues, which represent the number of scalars which meets the following condition
+// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
+func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
+	toReturn := make([]fr.Element, len(scalars))
+
+	// number of c-bit radixes in a scalar
+	nbChunks := fr.Limbs * 64 / c
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+
+	mask := uint64((1 << c) - 1)      // low c bits are 1
+	msbWindow := uint64(1 << (c - 1)) // msb of the c-bit window
+	max := int(1 << (c - 1))          // max value we want for our digits
+	cDivides64 := (64 % c) == 0       // if c doesn't divide 64, we may need to select over multiple words
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := uint64(0); chunk < nbChunks; chunk++ {
+		jc := uint64(chunk * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = !cDivides64 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+
+	// for each chunk, we could track the number of non-zeros points we will need to process
+	// this way, if a chunk has more work to do than others, we can spawn off more go routines
+	// (at the cost of more buckets allocated)
+	// a simplified approach is to track the small values where only the first word is set
+	// if this number represent a significant number of points, then we will split first chunk
+	// processing in the msm in 2, to ensure all go routines finish at ~same time
+	// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
+	// if it does, though, this will deadlocK.
+	chSmallValues := make(chan int, nbTasks)
+
+	parallel.Execute(len(scalars), func(start, end int) {
+		smallValues := 0
+		for i := start; i < end; i++ {
+			var carry int
+
+			scalar := scalars[i]
+			if scalarsMont {
+				scalar.FromMont()
+			}
+			if scalar.IsUint64() {
+				// everything is 0, no need to process this scalar
+				if scalar[0] == 0 {
+					continue
+				}
+				// low c-bits are 1 in mask
+				if scalar[0]&mask == scalar[0] {
+					smallValues++
+				}
+			}
+
+			// for each chunk in the scalar, compute the current digit, and an eventual carry
+			for chunk := uint64(0); chunk < nbChunks; chunk++ {
+				s := selectors[chunk]
+
+				// init with carry if any
+				digit := carry
+				carry = 0
+
+				// digit = value of the c-bit window
+				digit += int((scalar[s.index] & s.mask) >> s.shift)
+
+				if s.multiWordSelect {
+					// we are selecting bits over 2 words
+					digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
+				}
+
+				// if digit is zero, no impact on result
+				if digit == 0 {
+					continue
+				}
+
+				// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+				// 2^{c} to the current digit, making it negative.
+				if digit >= max {
+					digit -= (1 << c)
+					carry = 1
+				}
+
+				var bits uint64
+				if digit >= 0 {
+					bits = uint64(digit)
+				} else {
+					bits = uint64(-digit-1) | msbWindow
+				}
+
+				toReturn[i][s.index] |= (bits << s.shift)
+				if s.multiWordSelect {
+					toReturn[i][s.index+1] |= (bits >> s.shiftHigh)
+				}
+
+			}
+		}
+
+		chSmallValues <- smallValues
+
+	}, nbTasks)
+
+	// aggregate small values
+	close(chSmallValues)
+	smallValues := 0
+	for o := range chSmallValues {
+		smallValues += o
+	}
+	return toReturn, smallValues
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Affine, error) {
+	var _p G1Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG1Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G1Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG1Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG1Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG1Jac(p *G1Jac, c int, points []G1Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 6:
+		p.msmC6(points, scalars, splitFirstChunk)
+
+	case 7:
+		p.msmC7(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 9:
+		p.msmC9(points, scalars, splitFirstChunk)
+
+	case 10:
+		p.msmC10(points, scalars, splitFirstChunk)
+
+	case 11:
+		p.msmC11(points, scalars, splitFirstChunk)
+
+	case 12:
+		p.msmC12(points, scalars, splitFirstChunk)
+
+	case 13:
+		p.msmC13(points, scalars, splitFirstChunk)
+
+	case 14:
+		p.msmC14(points, scalars, splitFirstChunk)
+
+	case 15:
+		p.msmC15(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	case 20:
+		p.msmC20(points, scalars, splitFirstChunk)
+
+	case 21:
+		p.msmC21(points, scalars, splitFirstChunk)
+
+	case 22:
+		p.msmC22(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG1Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG1Affine(p *G1Jac, c int, chChunks []chan g1JacExtended) *G1Jac {
+	var _p g1JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG1Affine(chunk uint64,
+	chRes chan<- g1JacExtended,
+	buckets []g1JacExtended,
+	c uint64,
+	points []G1Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g1JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC6(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC7(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC9(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC10(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC11(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC12(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC13(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC14(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC15(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC20(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC21(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC22(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Affine, error) {
+	var _p G2Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 22}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG2Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G2Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG2Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG2Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG2Jac(p *G2Jac, c int, points []G2Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 6:
+		p.msmC6(points, scalars, splitFirstChunk)
+
+	case 7:
+		p.msmC7(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 9:
+		p.msmC9(points, scalars, splitFirstChunk)
+
+	case 10:
+		p.msmC10(points, scalars, splitFirstChunk)
+
+	case 11:
+		p.msmC11(points, scalars, splitFirstChunk)
+
+	case 12:
+		p.msmC12(points, scalars, splitFirstChunk)
+
+	case 13:
+		p.msmC13(points, scalars, splitFirstChunk)
+
+	case 14:
+		p.msmC14(points, scalars, splitFirstChunk)
+
+	case 15:
+		p.msmC15(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	case 20:
+		p.msmC20(points, scalars, splitFirstChunk)
+
+	case 21:
+		p.msmC21(points, scalars, splitFirstChunk)
+
+	case 22:
+		p.msmC22(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG2Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG2Affine(p *G2Jac, c int, chChunks []chan g2JacExtended) *G2Jac {
+	var _p g2JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG2Affine(chunk uint64,
+	chRes chan<- g2JacExtended,
+	buckets []g2JacExtended,
+	c uint64,
+	points []G2Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g2JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC6(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC7(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC9(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC10(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC11(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC12(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC13(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC14(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC15(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC20(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC21(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC22(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
diff --git a/ecc/bls12-378/multiexp_test.go b/ecc/bls12-378/multiexp_test.go
new file mode 100644
index 000000000..2ea2502d3
--- /dev/null
+++ b/ecc/bls12-378/multiexp_test.go
@@ -0,0 +1,1349 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestMultiExpG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G1Affine
+	var g G1Jac
+	g.Set(&g1Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g1Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G1] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G1Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G1Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G1] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G1Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G1Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G1] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=6) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 6, false, runtime.NumCPU())
+				result.msmC6(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=7) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 7, false, runtime.NumCPU())
+				result.msmC7(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=9) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 9, false, runtime.NumCPU())
+				result.msmC9(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=10) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 10, false, runtime.NumCPU())
+				result.msmC10(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=11) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 11, false, runtime.NumCPU())
+				result.msmC11(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=12) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 12, false, runtime.NumCPU())
+				result.msmC12(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=13) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 13, false, runtime.NumCPU())
+				result.msmC13(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=14) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 14, false, runtime.NumCPU())
+				result.msmC14(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=15) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 15, false, runtime.NumCPU())
+				result.msmC15(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=20) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 20, false, runtime.NumCPU())
+				result.msmC20(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=21) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 21, false, runtime.NumCPU())
+				result.msmC21(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=22) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 22, false, runtime.NumCPU())
+				result.msmC22(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G1] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G1Jac
+			g.Set(&g1Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G1Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g1Gen)
+			}
+
+			var op1MultiExp G1Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G1Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g1GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG1(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var t1, t2, t3 G1Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
+
+func TestMultiExpG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G2Affine
+	var g G2Jac
+	g.Set(&g2Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g2Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G2] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G2Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G2Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G2] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G2Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G2Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G2] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=6) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 6, false, runtime.NumCPU())
+				result.msmC6(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=7) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 7, false, runtime.NumCPU())
+				result.msmC7(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=9) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 9, false, runtime.NumCPU())
+				result.msmC9(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=10) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 10, false, runtime.NumCPU())
+				result.msmC10(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=11) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 11, false, runtime.NumCPU())
+				result.msmC11(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=12) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 12, false, runtime.NumCPU())
+				result.msmC12(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=13) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 13, false, runtime.NumCPU())
+				result.msmC13(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=14) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 14, false, runtime.NumCPU())
+				result.msmC14(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=15) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 15, false, runtime.NumCPU())
+				result.msmC15(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=20) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 20, false, runtime.NumCPU())
+				result.msmC20(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=21) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 21, false, runtime.NumCPU())
+				result.msmC21(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=22) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 22, false, runtime.NumCPU())
+				result.msmC22(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G2] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G2Jac
+			g.Set(&g2Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G2Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g2Gen)
+			}
+
+			var op1MultiExp G2Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G2Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g2GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG2(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var t1, t2, t3 G2Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
diff --git a/ecc/bls12-378/pairing.go b/ecc/bls12-378/pairing.go
new file mode 100644
index 000000000..08ed3775e
--- /dev/null
+++ b/ecc/bls12-378/pairing.go
@@ -0,0 +1,241 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bls12378
+
+import (
+	"errors"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// GT target group of the pairing
+type GT = fptower.E12
+
+type lineEvaluation struct {
+	r0 fptower.E2
+	r1 fptower.E2
+	r2 fptower.E2
+}
+
+// Pair calculates the reduced pairing for a set of points
+func Pair(P []G1Affine, Q []G2Affine) (GT, error) {
+	f, err := MillerLoop(P, Q)
+	if err != nil {
+		return GT{}, err
+	}
+	return FinalExponentiation(&f), nil
+}
+
+// PairingCheck calculates the reduced pairing for a set of points and returns True if the result is One
+func PairingCheck(P []G1Affine, Q []G2Affine) (bool, error) {
+	f, err := Pair(P, Q)
+	if err != nil {
+		return false, err
+	}
+	var one GT
+	one.SetOne()
+	return f.Equal(&one), nil
+}
+
+// FinalExponentiation computes the final expo x**(p**6-1)(p**2+1)(p**4 - p**2 +1)/r
+func FinalExponentiation(z *GT, _z ...*GT) GT {
+	var result GT
+	result.Set(z)
+
+	for _, e := range _z {
+		result.Mul(&result, e)
+	}
+
+	// https://eprint.iacr.org/2016/130.pdf
+	var t [3]GT
+
+	// easy part
+	t[0].Conjugate(&result)
+	result.Inverse(&result)
+	t[0].Mul(&t[0], &result)
+	result.FrobeniusSquare(&t[0]).
+		Mul(&result, &t[0])
+
+	// hard part (up to permutation)
+	// Daiki Hayashida and Kenichiro Hayasaka
+	// and Tadanori Teruya
+	// https://eprint.iacr.org/2020/875.pdf
+	t[0].CyclotomicSquare(&result)
+	t[1].Expt(&result)
+	t[2].InverseUnitary(&result)
+	t[1].Mul(&t[1], &t[2])
+	t[2].Expt(&t[1])
+	t[1].InverseUnitary(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	t[2].Expt(&t[1])
+	t[1].Frobenius(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	result.Mul(&result, &t[0])
+	t[0].Expt(&t[1])
+	t[2].Expt(&t[0])
+	t[0].FrobeniusSquare(&t[1])
+	t[1].InverseUnitary(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	t[1].Mul(&t[1], &t[0])
+	result.Mul(&result, &t[1])
+
+	return result
+}
+
+// MillerLoop Miller loop
+func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
+	// check input size match
+	n := len(P)
+	if n == 0 || n != len(Q) {
+		return GT{}, errors.New("invalid inputs sizes")
+	}
+
+	// filter infinity points
+	p := make([]G1Affine, 0, n)
+	q := make([]G2Affine, 0, n)
+
+	for k := 0; k < n; k++ {
+		if P[k].IsInfinity() || Q[k].IsInfinity() {
+			continue
+		}
+		p = append(p, P[k])
+		q = append(q, Q[k])
+	}
+
+	n = len(p)
+
+	// projective points for Q
+	qProj := make([]g2Proj, n)
+	for k := 0; k < n; k++ {
+		qProj[k].FromAffine(&q[k])
+	}
+
+	var result, lines GT
+	result.SetOne()
+
+	var l1, l2 lineEvaluation
+
+	// i == 62
+	for k := 0; k < n; k++ {
+		qProj[k].DoubleStep(&l1)
+		// line eval
+		l1.r1.MulByElement(&l1.r1, &p[k].X)
+		l1.r2.MulByElement(&l1.r2, &p[k].Y)
+		result.MulBy014(&l1.r0, &l1.r1, &l1.r2)
+	}
+
+	for i := 61; i >= 0; i-- {
+		result.Square(&result)
+
+		for k := 0; k < n; k++ {
+			qProj[k].DoubleStep(&l1)
+			// line eval
+			l1.r1.MulByElement(&l1.r1, &p[k].X)
+			l1.r2.MulByElement(&l1.r2, &p[k].Y)
+
+			if loopCounter[i] == 0 {
+				result.MulBy014(&l1.r0, &l1.r1, &l1.r2)
+			} else {
+				qProj[k].AddMixedStep(&l2, &q[k])
+				// line eval
+				l2.r1.MulByElement(&l2.r1, &p[k].X)
+				l2.r2.MulByElement(&l2.r2, &p[k].Y)
+				lines.Mul014By014(&l1.r0, &l1.r1, &l1.r2, &l2.r0, &l2.r1, &l2.r2)
+				result.Mul(&result, &lines)
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// DoubleStep doubles a point in Homogenous projective coordinates, and evaluates the line in Miller loop
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g2Proj) DoubleStep(l *lineEvaluation) {
+
+	// get some Element from our pool
+	var t1, A, B, C, D, E, EE, F, G, H, I, J, K fptower.E2
+	A.Mul(&p.x, &p.y)
+	A.Halve()
+	B.Square(&p.y)
+	C.Square(&p.z)
+	D.Double(&C).
+		Add(&D, &C)
+	E.Mul(&D, &bTwistCurveCoeff)
+	F.Double(&E).
+		Add(&F, &E)
+	G.Add(&B, &F)
+	G.Halve()
+	H.Add(&p.y, &p.z).
+		Square(&H)
+	t1.Add(&B, &C)
+	H.Sub(&H, &t1)
+	I.Sub(&E, &B)
+	J.Square(&p.x)
+	EE.Square(&E)
+	K.Double(&EE).
+		Add(&K, &EE)
+
+	// X, Y, Z
+	p.x.Sub(&B, &F).
+		Mul(&p.x, &A)
+	p.y.Square(&G).
+		Sub(&p.y, &K)
+	p.z.Mul(&B, &H)
+
+	// Line evaluation
+	l.r0.Set(&I)
+	l.r1.Double(&J).
+		Add(&l.r1, &J)
+	l.r2.Neg(&H)
+
+}
+
+// AddMixedStep point addition in Mixed Homogenous projective and Affine coordinates
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g2Proj) AddMixedStep(l *lineEvaluation, a *G2Affine) {
+
+	// get some Element from our pool
+	var Y2Z1, X2Z1, O, L, C, D, E, F, G, H, t0, t1, t2, J fptower.E2
+	Y2Z1.Mul(&a.Y, &p.z)
+	O.Sub(&p.y, &Y2Z1)
+	X2Z1.Mul(&a.X, &p.z)
+	L.Sub(&p.x, &X2Z1)
+	C.Square(&O)
+	D.Square(&L)
+	E.Mul(&L, &D)
+	F.Mul(&p.z, &C)
+	G.Mul(&p.x, &D)
+	t0.Double(&G)
+	H.Add(&E, &F).
+		Sub(&H, &t0)
+	t1.Mul(&p.y, &E)
+
+	// X, Y, Z
+	p.x.Mul(&L, &H)
+	p.y.Sub(&G, &H).
+		Mul(&p.y, &O).
+		Sub(&p.y, &t1)
+	p.z.Mul(&E, &p.z)
+
+	t2.Mul(&L, &a.Y)
+	J.Mul(&a.X, &O).
+		Sub(&J, &t2)
+
+	// Line evaluation
+	l.r0.Set(&J)
+	l.r1.Neg(&O)
+	l.r2.Set(&L)
+}
diff --git a/ecc/bls12-378/pairing_test.go b/ecc/bls12-378/pairing_test.go
new file mode 100644
index 000000000..5dd1db441
--- /dev/null
+++ b/ecc/bls12-378/pairing_test.go
@@ -0,0 +1,305 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestPairing(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genR1 := GenFr()
+	genR2 := GenFr()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (final expo) should output the same result", prop.ForAll(
+		func(a GT) bool {
+			b := a
+			b = FinalExponentiation(&a)
+			a = FinalExponentiation(&a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Exponentiating FinalExpo(a) to r should output 1", prop.ForAll(
+		func(a GT) bool {
+			b := FinalExponentiation(&a)
+			return !a.IsInSubGroup() && b.IsInSubGroup()
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Expt(Expt) and Exp(t^2) should output the same result in the cyclotomic subgroup", prop.ForAll(
+		func(a GT) bool {
+			var b, c, d GT
+			b.Conjugate(&a)
+			a.Inverse(&a)
+			b.Mul(&b, &a)
+
+			a.FrobeniusSquare(&b).
+				Mul(&a, &b)
+
+			c.Expt(&a).Expt(&c)
+			d.Exp(&a, xGen).Exp(&d, xGen)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] bilinearity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var res, resa, resb, resab, zero GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint, ab big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+			ab.Mul(&abigint, &bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			res, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+			resa, _ = Pair([]G1Affine{ag1}, []G2Affine{g2GenAff})
+			resb, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{bg2})
+
+			resab.Exp(&res, ab)
+			resa.Exp(&resa, bbigint)
+			resb.Exp(&resb, abigint)
+
+			return resab.Equal(&resa) && resab.Equal(&resb) && !res.Equal(&zero)
+
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] MillerLoop of pairs should be equal to the product of MillerLoops", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var simpleProd, factorizedProd GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			P0 := []G1Affine{g1GenAff}
+			P1 := []G1Affine{ag1}
+			Q0 := []G2Affine{g2GenAff}
+			Q1 := []G2Affine{bg2}
+
+			// FE( ML(a,b) * ML(c,d) * ML(e,f) * ML(g,h) )
+			M1, _ := MillerLoop(P0, Q0)
+			M2, _ := MillerLoop(P1, Q0)
+			M3, _ := MillerLoop(P0, Q1)
+			M4, _ := MillerLoop(P1, Q1)
+			simpleProd.Mul(&M1, &M2).Mul(&simpleProd, &M3).Mul(&simpleProd, &M4)
+			simpleProd = FinalExponentiation(&simpleProd)
+
+			tabP := []G1Affine{g1GenAff, ag1, g1GenAff, ag1}
+			tabQ := []G2Affine{g2GenAff, g2GenAff, bg2, bg2}
+
+			// FE( ML([a,c,e,g] ; [b,d,f,h]) ) -> saves 3 squares in Fqk
+			factorizedProd, _ = Pair(tabP, tabQ)
+
+			return simpleProd.Equal(&factorizedProd)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] PairingCheck", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var g1GenAffNeg G1Affine
+			g1GenAffNeg.Neg(&g1GenAff)
+			tabP := []G1Affine{g1GenAff, g1GenAffNeg}
+			tabQ := []G2Affine{g2GenAff, g2GenAff}
+
+			res, _ := PairingCheck(tabP, tabQ)
+
+			return res
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] MillerLoop should skip pairs with a point at infinity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var one GT
+
+			var ag1, g1Inf G1Affine
+			var bg2, g2Inf G2Affine
+
+			var abigint, bbigint big.Int
+
+			one.SetOne()
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			g1Inf.FromJacobian(&g1Infinity)
+			g2Inf.FromJacobian(&g2Infinity)
+
+			// e([0,c] ; [b,d])
+			tabP := []G1Affine{g1Inf, ag1}
+			tabQ := []G2Affine{g2GenAff, bg2}
+			res1, _ := Pair(tabP, tabQ)
+
+			// e([a,c] ; [0,d])
+			tabP = []G1Affine{g1GenAff, ag1}
+			tabQ = []G2Affine{g2Inf, bg2}
+			res2, _ := Pair(tabP, tabQ)
+
+			// e([0,c] ; [d,0])
+			tabP = []G1Affine{g1Inf, ag1}
+			tabQ = []G2Affine{bg2, g2Inf}
+			res3, _ := Pair(tabP, tabQ)
+
+			return res1.Equal(&res2) && !res2.Equal(&res3) && res3.Equal(&one)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkPairing(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkMillerLoop(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		MillerLoop([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkFinalExponentiation(b *testing.B) {
+
+	var a GT
+	a.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		FinalExponentiation(&a)
+	}
+
+}
+
+func BenchmarkMultiMiller(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				MillerLoop(P, Q)
+			}
+		})
+	}
+}
+
+func BenchmarkMultiPair(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				Pair(P, Q)
+			}
+		})
+	}
+}
diff --git a/ecc/bls12-378/twistededwards/doc.go b/ecc/bls12-378/twistededwards/doc.go
new file mode 100644
index 000000000..584dc49da
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package twistededwards provides bls12-378's twisted edwards "companion curve" defined on fr.
+package twistededwards
diff --git a/ecc/bls12-378/twistededwards/eddsa/doc.go b/ecc/bls12-378/twistededwards/eddsa/doc.go
new file mode 100644
index 000000000..e19c483f7
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/doc.go
@@ -0,0 +1,22 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package eddsa provides EdDSA signature scheme on bls12-378's twisted edwards curve.
+//
+// See also
+//
+// https://en.wikipedia.org/wiki/EdDSA
+package eddsa
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa.go b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
new file mode 100644
index 000000000..00f78b442
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
@@ -0,0 +1,265 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"errors"
+	"hash"
+	"io"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/twistededwards"
+	"github.com/consensys/gnark-crypto/signature"
+	"golang.org/x/crypto/blake2b"
+)
+
+var errNotOnCurve = errors.New("point not on curve")
+
+const (
+	sizeFr         = fr.Bytes
+	sizePublicKey  = sizeFr
+	sizeSignature  = 2 * sizeFr
+	sizePrivateKey = 2*sizeFr + 32
+)
+
+// PublicKey eddsa signature object
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type PublicKey struct {
+	A twistededwards.PointAffine
+}
+
+// PrivateKey private key of an eddsa instance
+type PrivateKey struct {
+	PublicKey PublicKey    // copy of the associated public key
+	scalar    [sizeFr]byte // secret scalar, in big Endian
+	randSrc   [32]byte     // source
+}
+
+// Signature represents an eddsa signature
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type Signature struct {
+	R twistededwards.PointAffine
+	S [sizeFr]byte
+}
+
+func init() {
+	signature.Register(signature.EDDSA_BLS12_378, GenerateKeyInterfaces)
+}
+
+// GenerateKey generates a public and private key pair.
+func GenerateKey(r io.Reader) (PrivateKey, error) {
+
+	c := twistededwards.GetEdwardsCurve()
+
+	var pub PublicKey
+	var priv PrivateKey
+
+	// hash(h) = private_key || random_source, on 32 bytes each
+	seed := make([]byte, 32)
+	_, err := r.Read(seed)
+	if err != nil {
+		return priv, err
+	}
+	h := blake2b.Sum512(seed[:])
+	for i := 0; i < 32; i++ {
+		priv.randSrc[i] = h[i+32]
+	}
+
+	// prune the key
+	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
+
+	h[0] &= 0xF8
+	h[31] &= 0x7F
+	h[31] |= 0x40
+
+	// reverse first bytes because setBytes interpret stream as big endian
+	// but in eddsa specs s is the first 32 bytes in little endian
+	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
+
+		h[i], h[j] = h[j], h[i]
+
+	}
+
+	copy(priv.scalar[:], h[:sizeFr])
+
+	var bscalar big.Int
+	bscalar.SetBytes(priv.scalar[:])
+	pub.A.ScalarMul(&c.Base, &bscalar)
+
+	priv.PublicKey = pub
+
+	return priv, nil
+}
+
+// GenerateKeyInterfaces generate interfaces for the public/private key.
+// This purpose of this function is to be registered in the list of signature schemes.
+func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
+	priv, err := GenerateKey(r)
+	return &priv, err
+}
+
+// Equal compares 2 public keys
+func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+	bpk := pub.Bytes()
+	bother := other.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bother) == 1
+}
+
+// Public returns the public key associated to the private key.
+// From Signer interface defined in gnark/crypto/signature.
+func (privKey *PrivateKey) Public() signature.PublicKey {
+	var pub PublicKey
+	pub.A.Set(&privKey.PublicKey.A)
+	return &pub
+}
+
+// Sign sign a message
+// Pure Eddsa version (see https://tools.ietf.org/html/rfc8032#page-8)
+func (privKey *PrivateKey) Sign(message []byte, hFunc hash.Hash) ([]byte, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	var res Signature
+
+	// blinding factor for the private key
+	// blindingFactorBigInt must be the same size as the private key,
+	// blindingFactorBigInt = h(randomness_source||message)[:sizeFr]
+	var blindingFactorBigInt big.Int
+
+	// randSrc = privKey.randSrc || msg (-> message = MSB message .. LSB message)
+	randSrc := make([]byte, 32+len(message))
+	for i, v := range privKey.randSrc {
+		randSrc[i] = v
+	}
+	copy(randSrc[32:], message)
+
+	// randBytes = H(randSrc)
+	blindingFactorBytes := blake2b.Sum512(randSrc[:]) // TODO ensures that the hash used to build the key and the one used here is the same
+	blindingFactorBigInt.SetBytes(blindingFactorBytes[:sizeFr])
+
+	// compute R = randScalar*Base
+	res.R.ScalarMul(&curveParams.Base, &blindingFactorBigInt)
+	if !res.R.IsOnCurve() {
+		return nil, errNotOnCurve
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	resRX := res.R.X.Bytes()
+	resRY := res.R.Y.Bytes()
+	resAX := privKey.PublicKey.A.X.Bytes()
+	resAY := privKey.PublicKey.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], resRX[:])
+	copy(dataToHash[sizeFr:], resRY[:])
+	copy(dataToHash[2*sizeFr:], resAX[:])
+	copy(dataToHash[3*sizeFr:], resAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	_, err := hFunc.Write(dataToHash[:])
+	if err != nil {
+		return nil, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// Compute s = randScalarInt + H(R,A,M)*S
+	// going with big int to do ops mod curve order
+	var bscalar, bs big.Int
+	bscalar.SetBytes(privKey.scalar[:])
+	bs.Mul(&hramInt, &bscalar).
+		Add(&bs, &blindingFactorBigInt).
+		Mod(&bs, &curveParams.Order)
+	sb := bs.Bytes()
+	if len(sb) < sizeFr {
+		offset := make([]byte, sizeFr-len(sb))
+		sb = append(offset, sb...)
+	}
+	copy(res.S[:], sb[:])
+
+	return res.Bytes(), nil
+}
+
+// Verify verifies an eddsa signature
+func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	// verify that pubKey and R are on the curve
+	if !pub.A.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// Deserialize the signature
+	var sig Signature
+	if _, err := sig.SetBytes(sigBin); err != nil {
+		return false, err
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	sigRX := sig.R.X.Bytes()
+	sigRY := sig.R.Y.Bytes()
+	sigAX := pub.A.X.Bytes()
+	sigAY := pub.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], sigRX[:])
+	copy(dataToHash[sizeFr:], sigRY[:])
+	copy(dataToHash[2*sizeFr:], sigAX[:])
+	copy(dataToHash[3*sizeFr:], sigAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	if _, err := hFunc.Write(dataToHash[:]); err != nil {
+		return false, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// lhs = cofactor*S*Base
+	var lhs twistededwards.PointAffine
+	var bCofactor, bs big.Int
+	curveParams.Cofactor.ToBigInt(&bCofactor)
+	bs.SetBytes(sig.S[:])
+	lhs.ScalarMul(&curveParams.Base, &bs).
+		ScalarMul(&lhs, &bCofactor)
+
+	if !lhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// rhs = cofactor*(R + H(R,A,M)*A)
+	var rhs twistededwards.PointAffine
+	rhs.ScalarMul(&pub.A, &hramInt).
+		Add(&rhs, &sig.R).
+		ScalarMul(&rhs, &bCofactor)
+	if !rhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// verifies that cofactor*S*Base=cofactor*(R + H(R,A,M)*A)
+	if !lhs.X.Equal(&rhs.X) || !lhs.Y.Equal(&rhs.Y) {
+		return false, nil
+	}
+
+	return true, nil
+}
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
new file mode 100644
index 000000000..b46dec00c
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/sha256"
+	"math/rand"
+	"testing"
+
+	crand "crypto/rand"
+
+	"fmt"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/hash"
+	"github.com/consensys/gnark-crypto/signature"
+)
+
+func Example() {
+	// instantiate hash function
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	// create a eddsa key pair
+	privateKey, _ := signature.EDDSA_BLS12_378.New(crand.Reader)
+	publicKey := privateKey.Public()
+
+	// note that the message is on 4 bytes
+	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
+
+	// sign the message
+	signature, _ := privateKey.Sign(msg, hFunc)
+
+	// verifies signature
+	isValid, _ := publicKey.Verify(signature, msg, hFunc)
+	if !isValid {
+		fmt.Println("1. invalid signature")
+	} else {
+		fmt.Println("1. valid signature")
+	}
+
+	// Output: 1. valid signature
+}
+
+func TestSerialization(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	privKey1, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey1 := privKey1.Public()
+
+	privKey2, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey2 := privKey2.Public()
+
+	pubKeyBin1 := pubKey1.Bytes()
+	pubKey2.SetBytes(pubKeyBin1)
+	pubKeyBin2 := pubKey2.Bytes()
+	if len(pubKeyBin1) != len(pubKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(pubKeyBin1); i++ {
+		if pubKeyBin1[i] != pubKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+
+	privKeyBin1 := privKey1.Bytes()
+	privKey2.SetBytes(privKeyBin1)
+	privKeyBin2 := privKey2.Bytes()
+	if len(privKeyBin1) != len(privKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(privKeyBin1); i++ {
+		if privKeyBin1[i] != privKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+}
+
+func TestEddsaMIMC(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(nil)
+	}
+	pubKey := privKey.Public()
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, err := privKey.Sign(msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035979")
+	msgBin = frMsg.Bytes()
+	res, err = pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+func TestEddsaSHA256(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := sha256.New()
+
+	// create eddsa obj and sign a message
+	// create eddsa obj and sign a message
+
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	signature, err := privKey.Sign([]byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, []byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	res, err = pubKey.Verify(signature, []byte("wrong_message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+// benchmarks
+
+func BenchmarkVerify(b *testing.B) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		b.Fatal(err)
+	}
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, _ := privKey.Sign(msgBin[:], hFunc)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		pubKey.Verify(signature, msgBin[:], hFunc)
+	}
+}
diff --git a/ecc/bls12-378/twistededwards/eddsa/marshal.go b/ecc/bls12-378/twistededwards/eddsa/marshal.go
new file mode 100644
index 000000000..c68129087
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/marshal.go
@@ -0,0 +1,133 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"io"
+)
+
+// Bytes returns the binary representation of the public key
+// follows https://tools.ietf.org/html/rfc8032#section-3.1
+// and returns a compressed representation of the point (x,y)
+//
+// x, y are the coordinates of the point
+// on the twisted Edwards as big endian integers.
+// compressed representation store x with a parity bit to recompute y
+func (pk *PublicKey) Bytes() []byte {
+	var res [sizePublicKey]byte
+	pkBin := pk.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pkBin[:])
+	return res[:]
+}
+
+// SetBytes sets p from binary representation in buf.
+// buf represents a public key as x||y where x, y are
+// interpreted as big endian binary numbers corresponding
+// to the coordinates of a point on the twisted Edwards.
+// It returns the number of bytes read from the buffer.
+func (pk *PublicKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePublicKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := pk.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !pk.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	return n, nil
+}
+
+// Bytes returns the binary representation of pk,
+// as byte array publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+func (privKey *PrivateKey) Bytes() []byte {
+	var res [sizePrivateKey]byte
+	pubkBin := privKey.PublicKey.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pubkBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:2*sizeFr], privKey.scalar[:])
+	subtle.ConstantTimeCopy(1, res[2*sizeFr:], privKey.randSrc[:])
+	return res[:]
+}
+
+// SetBytes sets pk from buf, where buf is interpreted
+// as  publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+// It returns the number byte read.
+func (privKey *PrivateKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePrivateKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := privKey.PublicKey.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !privKey.PublicKey.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, privKey.scalar[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	subtle.ConstantTimeCopy(1, privKey.randSrc[:], buf[2*sizeFr:])
+	n += sizeFr
+	return n, nil
+}
+
+// Bytes returns the binary representation of sig
+// as a byte array of size 3*sizeFr x||y||s where
+// * x, y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+func (sig *Signature) Bytes() []byte {
+	var res [sizeSignature]byte
+	sigRBin := sig.R.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], sigRBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:], sig.S[:])
+	return res[:]
+}
+
+// SetBytes sets sig from a buffer in binary.
+// buf is read interpreted as x||y||s where
+// * x,y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+// It returns the number of bytes read from buf.
+func (sig *Signature) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizeSignature {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := sig.R.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !sig.R.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, sig.S[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	return n, nil
+}
diff --git a/ecc/bls12-378/twistededwards/point.go b/ecc/bls12-378/twistededwards/point.go
new file mode 100644
index 000000000..8f6c45f7a
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/point.go
@@ -0,0 +1,411 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"crypto/subtle"
+	"io"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// PointAffine point on a twisted Edwards curve
+type PointAffine struct {
+	X, Y fr.Element
+}
+
+// PointProj point in projective coordinates
+type PointProj struct {
+	X, Y, Z fr.Element
+}
+
+const (
+	//following https://tools.ietf.org/html/rfc8032#section-3.1,
+	// an fr element x is negative if its binary encoding is
+	// lexicographically larger than -x.
+	mCompressedNegative = 0x80
+	mCompressedPositive = 0x00
+	mUnmask             = 0x7f
+
+	// size in byte of a compressed point (point.Y --> fr.Element)
+	sizePointCompressed = fr.Limbs * 8
+)
+
+// Bytes returns the compressed point as a byte array
+// Follows https://tools.ietf.org/html/rfc8032#section-3.1,
+// as the twisted Edwards implementation is primarily used
+// for eddsa.
+func (p *PointAffine) Bytes() [sizePointCompressed]byte {
+
+	var res [sizePointCompressed]byte
+	var mask uint
+
+	y := p.Y.Bytes()
+
+	if p.X.LexicographicallyLargest() {
+		mask = mCompressedNegative
+	} else {
+		mask = mCompressedPositive
+	}
+	// p.Y must be in little endian
+	y[0] |= byte(mask) // msb of y
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		y[i], y[j] = y[j], y[i]
+	}
+	subtle.ConstantTimeCopy(1, res[:], y[:])
+	return res
+}
+
+// Marshal converts p to a byte slice
+func (p *PointAffine) Marshal() []byte {
+	b := p.Bytes()
+	return b[:]
+}
+
+func computeX(y *fr.Element) (x fr.Element) {
+	var one, num, den fr.Element
+	one.SetOne()
+	num.Square(y)
+	den.Mul(&num, &edwards.D)
+	num.Sub(&one, &num)
+	den.Sub(&edwards.A, &den)
+	x.Div(&num, &den)
+	x.Sqrt(&x)
+	return
+}
+
+// SetBytes sets p from buf
+// len(buf) >= sizePointCompressed
+// buf contains the Y coordinate masked with a parity bit to recompute the X coordinate
+// from the curve equation. See Bytes() and https://tools.ietf.org/html/rfc8032#section-3.1
+// Returns the number of read bytes and an error if the buffer is too short.
+func (p *PointAffine) SetBytes(buf []byte) (int, error) {
+
+	if len(buf) < sizePointCompressed {
+		return 0, io.ErrShortBuffer
+	}
+	bufCopy := make([]byte, sizePointCompressed)
+	subtle.ConstantTimeCopy(1, bufCopy, buf[:sizePointCompressed])
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		bufCopy[i], bufCopy[j] = bufCopy[j], bufCopy[i]
+	}
+	isLexicographicallyLargest := (mCompressedNegative&bufCopy[0])>>7 == 1
+	bufCopy[0] &= mUnmask
+	p.Y.SetBytes(bufCopy)
+	p.X = computeX(&p.Y)
+	if isLexicographicallyLargest {
+		if !p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	} else {
+		if p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	}
+
+	return sizePointCompressed, nil
+}
+
+// Unmarshal alias to SetBytes()
+func (p *PointAffine) Unmarshal(b []byte) error {
+	_, err := p.SetBytes(b)
+	return err
+}
+
+// Set sets p to p1 and return it
+func (p *PointProj) Set(p1 *PointProj) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.Set(&p1.Z)
+	return p
+}
+
+// Set sets p to p1 and return it
+func (p *PointAffine) Set(p1 *PointAffine) *PointAffine {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	return p
+}
+
+// Equal returns true if p=p1 false otherwise
+func (p *PointAffine) Equal(p1 *PointAffine) bool {
+	return p.X.Equal(&p1.X) && p.Y.Equal(&p1.Y)
+}
+
+// Equal returns true if p=p1 false otherwise
+// If one point is on the affine chart Z=0 it returns false
+func (p *PointProj) Equal(p1 *PointProj) bool {
+	if p.Z.IsZero() || p1.Z.IsZero() {
+		return false
+	}
+	var pAffine, p1Affine PointAffine
+	pAffine.FromProj(p)
+	p1Affine.FromProj(p1)
+	return pAffine.Equal(&p1Affine)
+}
+
+// NewPointAffine creates a new instance of PointAffine
+func NewPointAffine(x, y fr.Element) PointAffine {
+	return PointAffine{x, y}
+}
+
+// IsOnCurve checks if a point is on the twisted Edwards curve
+func (p *PointAffine) IsOnCurve() bool {
+
+	ecurve := GetEdwardsCurve()
+
+	var lhs, rhs, tmp fr.Element
+
+	tmp.Mul(&p.Y, &p.Y)
+	lhs.Mul(&p.X, &p.X)
+	mulByA(&lhs)
+	lhs.Add(&lhs, &tmp)
+
+	tmp.Mul(&p.X, &p.X).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &ecurve.D)
+	rhs.SetOne().Add(&rhs, &tmp)
+
+	return lhs.Equal(&rhs)
+}
+
+// Add adds two points (x,y), (u,v) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Add(p1, p2 *PointAffine) *PointAffine {
+
+	ecurve := GetEdwardsCurve()
+
+	var xu, yv, xv, yu, dxyuv, one, denx, deny fr.Element
+	pRes := new(PointAffine)
+	xv.Mul(&p1.X, &p2.Y)
+	yu.Mul(&p1.Y, &p2.X)
+	pRes.X.Add(&xv, &yu)
+
+	xu.Mul(&p1.X, &p2.X)
+	mulByA(&xu)
+	yv.Mul(&p1.Y, &p2.Y)
+	pRes.Y.Sub(&yv, &xu)
+
+	dxyuv.Mul(&xv, &yu).Mul(&dxyuv, &ecurve.D)
+	one.SetOne()
+	denx.Add(&one, &dxyuv)
+	deny.Sub(&one, &dxyuv)
+
+	p.X.Div(&pRes.X, &denx)
+	p.Y.Div(&pRes.Y, &deny)
+
+	return p
+}
+
+// Double doubles point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Double(p1 *PointAffine) *PointAffine {
+
+	p.Set(p1)
+	var xx, yy, xy, denum, two fr.Element
+
+	xx.Square(&p.X)
+	yy.Square(&p.Y)
+	xy.Mul(&p.X, &p.Y)
+	mulByA(&xx)
+	denum.Add(&xx, &yy)
+
+	p.X.Double(&xy).Div(&p.X, &denum)
+
+	two.SetOne().Double(&two)
+	denum.Neg(&denum).Add(&denum, &two)
+
+	p.Y.Sub(&yy, &xx).Div(&p.Y, &denum)
+
+	return p
+}
+
+// Neg negates point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointProj) Neg(p1 *PointProj) *PointProj {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// FromProj sets p in affine from p in projective
+func (p *PointAffine) FromProj(p1 *PointProj) *PointAffine {
+	p.X.Div(&p1.X, &p1.Z)
+	p.Y.Div(&p1.Y, &p1.Z)
+	return p
+}
+
+// FromAffine sets p in projective from p in affine
+func (p *PointProj) FromAffine(p1 *PointAffine) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.SetOne()
+	return p
+}
+
+// Add adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-add-2008-bbjlp
+func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var A, B, C, D, E, F, G, H, I fr.Element
+	A.Mul(&p1.Z, &p2.Z)
+	B.Square(&A)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &A).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	C.Neg(&C)
+	res.Y.Add(&D, &C).
+		Mul(&res.Y, &A).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// MixedAdd adds a point in projective to a point in affine coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-madd-2008-bbjlp
+func (p *PointProj) MixedAdd(p1 *PointProj, p2 *PointAffine) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var B, C, D, E, F, G, H, I fr.Element
+	B.Square(&p1.Z)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &p1.Z).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	res.Y.Sub(&D, &C).
+		Mul(&res.Y, &p1.Z).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// Double adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#doubling-dbl-2008-bbjlp
+func (p *PointProj) Double(p1 *PointProj) *PointProj {
+
+	var res PointProj
+
+	var B, C, D, E, F, H, J fr.Element
+
+	B.Add(&p1.X, &p1.Y).Square(&B)
+	C.Square(&p1.X)
+	D.Square(&p1.Y)
+	E.Set(&C)
+	mulByA(&E)
+	F.Add(&E, &D)
+	H.Square(&p1.Z)
+	J.Sub(&F, &H).Sub(&J, &H)
+	res.X.Sub(&B, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &J)
+	res.Y.Sub(&E, &D).Mul(&res.Y, &F)
+	res.Z.Mul(&F, &J)
+
+	p.Set(&res)
+	return p
+}
+
+// Neg sets p to -p1 and returns it
+func (p *PointAffine) Neg(p1 *PointAffine) *PointAffine {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// setInfinity sets p to O (0:1:1)
+func (p *PointProj) setInfinity() *PointProj {
+	p.X.SetZero()
+	p.Y.SetOne()
+	p.Z.SetOne()
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in projective coordinates with a scalar in big.Int
+func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
+
+	var _scalar big.Int
+	_scalar.Set(scalar)
+	p.Set(p1)
+	if _scalar.Sign() == -1 {
+		_scalar.Neg(&_scalar)
+		p.Neg(p)
+	}
+	var resProj PointProj
+	resProj.setInfinity()
+	const wordSize = bits.UintSize
+	sWords := _scalar.Bits()
+
+	for i := len(sWords) - 1; i >= 0; i-- {
+		ithWord := sWords[i]
+		for k := 0; k < wordSize; k++ {
+			resProj.Double(&resProj)
+			kthBit := (ithWord >> (wordSize - 1 - k)) & 1
+			if kthBit == 1 {
+				resProj.Add(&resProj, p)
+			}
+		}
+	}
+
+	p.Set(&resProj)
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in affine coordinates with a scalar in big.Int
+func (p *PointAffine) ScalarMul(p1 *PointAffine, scalar *big.Int) *PointAffine {
+
+	var p1Proj, resProj PointProj
+	p1Proj.FromAffine(p1)
+	resProj.ScalarMul(&p1Proj, scalar)
+	p.FromProj(&resProj)
+
+	return p
+}
diff --git a/ecc/bls12-378/twistededwards/twistededwards_test.go b/ecc/bls12-378/twistededwards/twistededwards_test.go
new file mode 100644
index 000000000..cb8e64f26
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/twistededwards_test.go
@@ -0,0 +1,456 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	// affine
+	properties.Property("Equal affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1 PointAffine
+			p1.Set(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&params.Base)
+		},
+	))
+
+	properties.Property("Add affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+			p3.Set(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.Set(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			var s big.Int
+			s.SetUint64(10)
+
+			p2.ScalarMul(&p1, &s)
+			p1.ScalarMul(&p1, &s)
+
+			return p2.Equal(&p1)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	// proj
+	properties.Property("Equal projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1, baseProj PointProj
+			p1.FromAffine(&params.Base)
+			baseProj.FromAffine(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&baseProj)
+		},
+	))
+
+	properties.Property("Add projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+			p3.FromAffine(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.FromAffine(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestField(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS := GenBigInt()
+
+	properties.Property("MulByA(x) should match Mul(x, curve.A)", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var z1, z2 fr.Element
+			z1.SetBigInt(&s)
+			z2.Mul(&z1, &params.A)
+			mulByA(&z1)
+
+			return z1.Equal(&z2)
+		},
+		genS,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS1 := GenBigInt()
+	genS2 := GenBigInt()
+
+	// affine
+	properties.Property("(affine) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+			p2.Neg(&p1)
+
+			p1.Add(&p1, &p2)
+
+			var one fr.Element
+			one.SetOne()
+
+			return p1.IsOnCurve() && p1.X.IsZero() && p1.Y.Equal(&one)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			p1.ScalarMul(&params.Base, &s)
+			p2.ScalarMul(&params.Base, &s)
+
+			p1.Add(&p1, &p2)
+			p2.Double(&p2)
+
+			return p1.IsOnCurve() && p1.Equal(&p2) && !p1.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) [a]P+[b]P = [a+b]P", prop.ForAll(
+		func(s1, s2 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetZero()
+			p1.ScalarMul(&params.Base, &s1)
+			p2.ScalarMul(&params.Base, &s2)
+			p3.Set(&params.Base)
+
+			p2.Add(&p1, &p2)
+
+			s1.Add(&s1, &s2)
+			p3.ScalarMul(&params.Base, &s1)
+
+			return p2.IsOnCurve() && p3.Equal(&p2) && !p3.Equal(&inf)
+		},
+		genS1,
+		genS2,
+	))
+
+	properties.Property("(affine) [a]P+[-a]P = O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetOne()
+			p1.ScalarMul(&params.Base, &s1)
+			s1.Neg(&s1)
+			p2.ScalarMul(&params.Base, &s1)
+
+			p2.Add(&p1, &p2)
+
+			return p2.IsOnCurve() && p2.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("[5]P=[2][2]P+P", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+
+			five := big.NewInt(5)
+			p2.Double(&p1).Double(&p2).Add(&p2, &p1)
+			p1.ScalarMul(&p1, five)
+
+			return p2.IsOnCurve() && p2.Equal(&p1)
+		},
+		genS1,
+	))
+
+	// proj
+	properties.Property("(projective) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p1.ScalarMul(&baseProj, &s1)
+			p2.Neg(&p1)
+
+			p.Add(&p1, &p2)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(projective) P+P=2*P", prop.ForAll(
+
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p.ScalarMul(&baseProj, &s)
+
+			p1.Add(&p, &p)
+			p2.Double(&p)
+
+			return p1.Equal(&p2)
+		},
+		genS1,
+	))
+
+	// mixed
+	properties.Property("(mixed) P+(-P)=O", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+			pAffine.Neg(&pAffine)
+
+			p.MixedAdd(&pProj, &pAffine)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(mixed) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p, p2 PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+
+			p.MixedAdd(&pProj, &pAffine)
+			p2.Double(&pProj)
+
+			return p.Equal(&p2)
+		},
+		genS1,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestMarshal(t *testing.T) {
+
+	var point, unmarshalPoint PointAffine
+	point.Set(&edwards.Base)
+	for i := 0; i < 20; i++ {
+		b := point.Marshal()
+		unmarshalPoint.Unmarshal(b)
+		if !point.Equal(&unmarshalPoint) {
+			t.Fatal("error unmarshal(marshal(point))")
+		}
+		point.Add(&point, &edwards.Base)
+	}
+}
+
+// GenBigInt generates a big.Int
+// TODO @thomas we use fr size as max bound here
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkScalarMul(b *testing.B) {
+	params := GetEdwardsCurve()
+	var a PointProj
+	var s big.Int
+	a.FromAffine(&params.Base)
+	s.SetString("52435875175126190479447705081859658376581184513", 10)
+	s.Add(&s, &params.Order)
+
+	var doubleAndAdd PointProj
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.ScalarMul(&a, &s)
+		}
+	})
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index 7f5e531c0..cea2fb10d 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
 //
 // Also
 //
@@ -40,6 +40,7 @@ const (
 	UNKNOWN ID = iota
 	BN254
 	BLS12_377
+	BLS12_378
 	BLS12_381
 	BLS24_315
 	BW6_761
@@ -48,7 +49,7 @@ const (
 
 // Implemented return the list of curves fully implemented in gnark-crypto
 func Implemented() []ID {
-	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315}
+	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378}
 }
 
 func (id ID) String() string {
@@ -56,6 +57,8 @@ func (id ID) String() string {
 	switch id {
 	case BLS12_377:
 		return "bls12_377"
+	case BLS12_378:
+		return "bls12_378"
 	case BLS12_381:
 		return "bls12_381"
 	case BN254:
@@ -78,6 +81,8 @@ func (id ID) Info() Info {
 	switch id {
 	case BLS12_377:
 		return newInfo(&config.BLS12_377)
+	case BLS12_378:
+		return newInfo(&config.BLS12_378)
 	case BLS12_381:
 		return newInfo(&config.BLS12_381)
 	case BN254:
diff --git a/internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000 b/internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
new file mode 100644
index 0000000000000000000000000000000000000000..073bdc04b1163323dae29e87d0d3dfeb44c9b73e
GIT binary patch
literal 2295
zcmXAp0c4eX9EN}IcV`_lGc(7`%*@Qp%*@Qp%*@R0%<jxgW@dM0cV?|QlFUrEJCh_y
zk|arzBuSDalO#!!BuSDaNs=VV`Ffvo^*rDE{J(QH>uaZM`d=dL7|ZScC-?U*rJJGj
zKtxUY?;ld~*D5FZ9e=7Qq+wqp@|kVFFRw;kNh0wp#VY2K2^Fa%lG0E{zLcmuRIUnc
zSMu&GmntsRyy|0>YD1-}<16Yz4QdG0@vSS>6lzp+D65uGjas?a7HU>IU)vFCQfH`D
zU7>b$hw9Z6s!(sJLw%uM^@qAO5GvDPs6|7eehr7ZG!p95XsAtN;lGscjcPnptI5!W
zCPGu13QcP|G^3f&tY$-VnhVWqKD3~P(4rPYOIivoYdN%{mC&kILpkL_d$cFCS9?Q~
z+80{WT4-JCp$%>L5$`uV>B8epH~)X`TS~Wl^Puq@GM>Z6bHsR#8qYD~Ic`D&N>BK3
z(s)i8&uQa1V?1Y#=bZ7JH=YZ|bJ2J%8P8?oxnew5jpv&2TsNK@#&gqzMwH(2;kNPI
zF`jMXxobT4jOV`bJTRVz#`DN{9vjaS<9TX4&y44}@w_mem&WtTcwQUN8{>IvLY+$A
zg?1`^?^_>?=cDm_GM>-I^Tl|+8qYW5`EEQvjOVBE{4$=r3FTFgOM;<6PZ8&0Br-^p
zAW@1$84~44R3K4_L>7rEB&w09L82CkIwb0mXh5P7Lqnb>&do@)Akm6M8xrkEbRf}*
zL>Cg>Nc14li$ot1{YVTT!DAXE_<4qiVI)S77{$=IXN)s3PD~&%iNsDMrjVFMVg`v>
zB<7HqM`8hqMI@GxSVm$6iB%+W$ZuDk&v)LRa7*k%VhxFPBsP%PkHi5aHj&su;vf=-
zkT{IQ5hRWxaSVy$NSwgXnCB$tQ%Iaf;tUdJkvNC^@156q&KHolh{Po%E+cUTiK|Fl
zL*hCTH;}lA#4RLlBXI|bZ6xj@aSw_6NIXE|Arg;}c#OmoB%UJi42kDRyg=e560eYW
zjl>&_!Y}w+&hL<TkHiNgJ|gi6iO)!ULE<YC-;nr@QTSm0;QSMbUq}@GIFf=~5+sU{
z@IP8oOk|KKL826iG9=28s6e6;i7XOTNK_+HgG4P7bx71B(SSrF5=}@nBhi9HD-vx;
iv?I}hL?;qmNOU97gG4V9eMs~pF@VG%5<^G~WBdliOVd~Y

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000 b/internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
new file mode 100644
index 0000000000000000000000000000000000000000..1c39203e965a4186417c16fdeb5b1d70c0b0d35c
GIT binary patch
literal 3532
zcmXBW0c6$v8o=@2^WSu5c9Uc>lbKACWG2Z>CfS|o&Lm0FO}aC^-As}sy}e12B;6!Q
zk|arzZjvNPk|arzBuSDaNs@E!^EuP=`9I&^TG`Io|D3hg|EePXHahvAe^37FkEri2
zQQuceXZ_Eni0Yr)oFYHtAJJSQhVxoV?eG81&#T4%N>%yG=2A^7(Ht!ft)*%!)$|h6
zA}>|UbeUD6CEC?&)lp))s$!1nQr#|dU3y&Rxy&yyKNk3}7nWG)(i^=c`l7GI;#gdw
zBbJm{8cR#`M}LWd81Tu<N({zeiRH1p#EMu^Vkm}6e(}R4Mq;E?D@%;VXo;~HE3qn8
zl~^6CORR}CCDz8;66<1JiS@C*#D>^VqAfO-*c6*eY>v$(#$&w1L`;;}5?e}ajjbhS
z#I_QXF<D}JY%j4Rc9hr|J4@_}T_twM?h<=qPd>PNb3De42X@xf1AZXt2l8VFb8;QZ
z$#pm<*O8oDM{{x=%PG+r_2YRsk(29WPOeiqrK+FKJd=~_Y)-CoIl0c~<hqcP>tar>
zOF6kN=j6JQlj~|uu4_3Zx}ttP4>xjh-OR~#D<{|OoLqNua^1}-(G&H1dAOgG>p@Pg
zhdH?(<>Y#tlj})NuBSOAx}*Lq56^RQy~xS+GAGxo9FK7Qbspa2<a(Qv>s?N+_c^&f
z<mCF8Q=&EMpYrfIC)bypTwil?eap%9Jtx<XoLoP1a{bE5^*bk5EvG~+8eA$$%*)m6
zJPpx;XhpOkrXyxhVrH(H&a)8hh}nn^L?>bnq6^WDn2YE^`1<C#eQNHaS}Z^;L@Yw|
zBKi=EaR=AjU$wl$lITYaAeJEpDOJrqSIfIDi6O)=Vg#`gF^U*NtU|0ttU;{BEm?Dq
z*77b*Vgq6$ViRIBVjMAn*uvD~wAFbVViK_(u>-Lav5V4p&UQQRLF`5BL+nQ!KpaFI
zLLA23U5g{mM-j&m#}OwGCz<+*I^}#CaRzY~aSm}FaRG4=aS3r5aRqS|agC`j^Sbj5
z#7)F4#BIbK#9hQaO5+Z=@B9Gq5b+4{81V%0l&OFG%=tOu1>z;*72-AG4dN~09pXLW
z1L7m%6XG+aam#&i{)+g9_>TC2_=)(1_>E}%$*39{KTO_LYgIF18lnZ!ifBVjN6bLX
zM9jj=XRT^?o{i{0bRy;;x)9xnxriRbJj8swQPiph&I=KX5WR>##A3t}#8N~*#j9O4
z;JgemNWQ{U%bivrh7iMu5yVQwC}Iq;isD7ETJ5|Bu@=8Y)T(vP>k%6e8xfljn-SxP
z3H*LitF}0AMQlS%;=9@Iv;(mdu?w*qu?Mjiu@AAIsqf~1^FhQR#9_n{#8JdC#BmyL
zZPf|qlZaD@(}**OvxsvPZ)DYZ=L?97h)ameh%1Pzh---Jh#QETG+ybdTh6x;cMx}(
zdg|^u-$y(^JVZP~JVrc0JViW1JV(4hyhOZ0yhgl1yhXf2yhnUMd_;Ugd`5ghe5LVP
zSABE-j`)H2iTH*1ji`~))ZkLlxLcc=ou?sM5Uq$d#B{_A8owAc&2*lH$fv!jUCc&w
zkU!r|olbKQU5IYPTtp9I9%4RX0b(Iy5uz8-hgghQf>?^^M+_jAAqEl45i1Zwh+)JC
lVkKe}F@{)$SdCbNSc_PPSdZ9%*ofGK*o+uQOdz&U{tunqTR;E+

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228 b/internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
new file mode 100644
index 0000000000000000000000000000000000000000..e40bf61a54cf030d9274511ce49ccfa702b4e4ef
GIT binary patch
literal 1907
zcmXAp4@6yY97n&Ox3naaBuSDaNs=T<k|Zr@NhV2J(vsGanIzetWM*b2nVFfHnVFfH
znVFfHnPg^WW|EninVFf)Z1e6ur#R<*&hNha$n{<Y$;p(oyLa~R!Px_clq`jk6A{J9
z@m*4C-wY@Dj=jnVsmres>D|q~FZs(GsYv;aOl9(t70OU{q;f)8@=LaIL%GW1az1w#
zP=&nVDpYYOM<v``8Y)v+s6^$Vd{u<XRT(N!Rj5eST&oFHs+P~zg(_4Zs#Zg&R*j)j
zHHGrj9I8`Gs9CL{Mzw|Zs6AArj!>&QLk;Q*wWvE(qn=PF*SggkDpr4}Pko^Q4TJ_Y
z7#h-0XjsFc5sidKH5wYzSZG}1p$ScdCN&wF(o|?#)1ev7gl07xI-(<?ejN?XX)ZLc
z`OtzELW^4T6Fz2m(A!TpS=xE*%Sx7g@s#nLHl8!abJlpy8P9nWYE!b}!>aLIFrJIX
zbIEuv8_yNvxoSMujOV)X+%TS-#&gSfZX3@X<GE`*_l)Ph34ark2R=MBo=3*>*m#~8
z&r{=hW<1Z0=Y{dC8PB@$yfmIy#`D^E-WbnY<9TO1?~P}}cs7mag9+6u`54--<dZLb
zHl8oW^VN918P9j)`C&Xijc3bvei_ei<N0Ge|BNSXLTPOqYImmS3?wp<$U-6;i5w*M
zAd!ni9uoOT6d+NEL=h6jNR%K^ibNR_<rwPlRB*0Dq6&#>Bx;bTMWPOgdL$Z<Xhfn3
ziDo2PkZ47s4GA7oJHg-6L3AR~g+w=odObayiC&@)iGC#ZBQb!)AQD4J3?ngu#3&MD
zNQ@&ffy5*dQ%FoBF@t=!()_m5{&id8C=zo>%p<XY#3B;MkT`+F5)#WuoI>I>5@(P&
zi^Mr3&Lgpcp&rjF=L<+&MB)+>myx)F{L@bBD(7oRTu0&t5;u{!g~V+n?jUg&iF-)g
zN8$kz50Q9;#A76$An_E5XGlCp;sp|GNUS6A5{Xwxyhh>;5^s@shs1j%HjvoF*#3fl
f;QSGZPe^=5;tLXAk@$u?PxL$IA4vQ}VhiIxt!Ze)

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228 b/internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
new file mode 100644
index 0000000000000000000000000000000000000000..83c675ccdee7f4ede5c044d34243d6576b7b41e9
GIT binary patch
literal 3158
zcmXBW0f>!#7Qo?i-kIz)O_L@~lB^_2(wC$!Ns?qGJ4uoxD_O}}O_C&8D@j(8BuSDa
zNs=T<k|arzBuSDaNs=V#-aXHqoaa5i{}`INciwwvu>IdA^Djfg|NiIjKmL{N{gLf`
z5#!tc>B?;WzRfA?Gyay{k-4O<5v{(z*5|eIf6+AlvOAhlk=@zj&>Kx(H2smIvMx;y
zxQvPH$w4(%jf?D8O-@h~)g+h6E>m2lx=f3lmec*$Ga_fW%*>gQvvOAC?3^7rF8_?2
zlXD{H=G@46InO80k6e%oA{XYu$VIs*a&az>`r?;FF3qLUEQ=h<p~&UAJaR>@h+LT~
zBUk0B$kn+za!sy@T$^hn*X6p%zFZ%<AvZ*B%#D$oa#Q5y+#I<jw?uBut&yX1TjX#K
zM{dvUkvnon<j&j~xhr=??#|tjdvZ@bxO;0n#+?Ut%*X?NAlnD(V+U($9jd8yxTe;T
znp#I|Y8|VI9G~subvRK|>ts!>Q#H}FPgkC)sdcud*14Km=WA+RsHt_arq-pJT9<2T
zU8$*cwWij!n#hUSzFvnLHMMTm)Vftu>vm18J2kcL)<jOp_PsjXuc`H*rq;unT90aK
zJ+7(sq^8!>n#f7nepZL)HML&U)OuM{>s5_Mxc#~gZ)$42t*P~{rq=tKS|4g^eXNP>
z&Gx4{e6FeWrKZ-`np)p#YJIP%^`oZN&zf4lYHIzisnx29Y-NW_L*&$2-Oi&BJ&0aJ
zAEF;In#h4#1I}X*gNU(+aftDV35bb^Nr=gaDF|QRRJTvdUDV3yh#81K5Hk_85VLUy
zx7=T?y2FZ?i<pO)k61u7E%#ij?z$otBbFeRB9<YB5X%uO5GxU@5UX)Zw%ntwx=V{#
zhggr;fY^xGgxHMO!pP&a)p;9Y7_l9(1F;jai|9OOyPfwS_9FHn_9G4;4k8XA4&&}_
z<q_wjh+~N3h!cpDjQm8Maz2eXgE)&ghd7V8fVhabgt&~jg1Cyf#>kg>-T4OMCgK+2
zHsTKAF5({1xdZMyKR`T0JVHE1JV88V<R3qCevWv7c!_w0c#U|2c#C+4c#rsi_=xy~
z_)K(exi8LN5#JEs5kC+=5x)?>5uHC3O-JX4$-8Q+=|+q~^dNc>eTaU<Xv6?w3|>B4
z&7kvG#5lxw#011d#3aOI#1zC-#5BB7w3_M8GZ23uW+G-GW+VPY%t6d0yxKMMoaZAJ
zP_Hn}LZ?ND#fT+{rHEySA;faT3c`zEv(kALVl{q?Xf<n`*CN&-)+06`HX=45Hskk`
zR<p%<D`FdB7~joyryYo$h+T-?h&_nCh<%9tjC?l-oDU)nAr2#sAdVuAA&%2|Yimw8
zpG2HOoJO2MoJE`?ypc8Moi89RA}%2=Bd#E>BCa8>BW@sW(s`w8ZaLpZ+(F!B<f*&o
zd>`=u@euI{@fh(0@f7h4@f`62@e=V0@fz_4@fPt8@gDI3@e%O}@fq<2@s-YNUGvTP
zJK_i8C*l|4H=;#FSBFbO=Wgxlb{>W3LG&W}5dDbJbbc}D8gL$isHeSaP>d!153Y#u
A2><{9

literal 0
HcmV?d00001

diff --git a/internal/generator/config/bls12-378.go b/internal/generator/config/bls12-378.go
new file mode 100644
index 000000000..4fb9eddfe
--- /dev/null
+++ b/internal/generator/config/bls12-378.go
@@ -0,0 +1,29 @@
+package config
+
+var BLS12_378 = Curve{
+	Name:         "bls12-378",
+	CurvePackage: "bls12378",
+	EnumID:       "BLS12_378",
+	FrModulus:    "14883435066912132899950318861128167269793560281114003360875131245101026639873",
+	FpModulus:    "605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E2",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BLS12_378)
+
+}
diff --git a/internal/generator/ecc/template/point.go.tmpl b/internal/generator/ecc/template/point.go.tmpl
index 4b0aa9e73..1c7eae8a3 100644
--- a/internal/generator/ecc/template/point.go.tmpl
+++ b/internal/generator/ecc/template/point.go.tmpl
@@ -508,7 +508,7 @@ func (p *{{ $TJacobian }}) IsOnCurve() bool {
 
                 return res.IsOnCurve() && res.Z.IsZero()
         }
-        {{else if eq .Name "bls12-377"}}
+        {{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
             // https://eprint.iacr.org/2021/1130.pdf, sec.4
             // psi(p) = u*P
             func (p *{{ $TJacobian }}) IsInSubGroup() bool {
@@ -676,7 +676,7 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	res.ScalarMultiplication(a, &xGen).AddAssign(a)
 	p.Set(&res)
 	return p
-{{else if eq .Name "bls12-377"}}
+{{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
 	// cf https://eprint.iacr.org/2019/403.pdf, 5
 	var res {{$TJacobian}}
 	res.ScalarMultiplication(a, &xGen).Neg(&res).AddAssign(a)
@@ -802,7 +802,7 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 
 	return p
 
-{{else if eq .Name "bls12-377"}}
+{{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
     // https://eprint.iacr.org/2017/419.pdf, 4.1
 	var xg, xxg, res, t G2Jac
 	xg.ScalarMultiplication(a, &xGen)

From 0d302839c8fce63e52dfd9ef181f3ab006a76448 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Mon, 20 Dec 2021 13:47:38 +0100
Subject: [PATCH 02/29] fix(bls12-378): set root of unity for FFT

---
 ecc/bls12-378/fr/fft/domain.go                  | 7 +++++++
 ecc/bls12-378/fr/fft/fft.go                     | 2 ++
 ecc/bls12-378/fr/fft/fft_test.go                | 2 ++
 ecc/bls12-378/fr/fft/fuzz.go                    | 2 ++
 internal/generator/fft/template/domain.go.tmpl  | 7 ++++---
 internal/generator/fft/template/imports.go.tmpl | 8 ++++++--
 6 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/ecc/bls12-378/fr/fft/domain.go b/ecc/bls12-378/fr/fft/domain.go
index 97ec9125e..ac953fafd 100644
--- a/ecc/bls12-378/fr/fft/domain.go
+++ b/ecc/bls12-378/fr/fft/domain.go
@@ -24,6 +24,10 @@ import (
 	"runtime"
 	"sync"
 
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+
+	curve "github.com/consensys/gnark-crypto/ecc/bls12-378"
+
 	"github.com/consensys/gnark-crypto/ecc"
 )
 
@@ -80,6 +84,9 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 	// generator of the largest 2-adic subgroup
 	var rootOfUnity fr.Element
 
+	rootOfUnity.SetString("4045585818372166415418670827807793147093034396422209590578257013290761627990")
+	const maxOrderRoot uint64 = 42
+
 	domain := &Domain{}
 	x := ecc.NextPowerOfTwo(m)
 	domain.Cardinality = uint64(x)
diff --git a/ecc/bls12-378/fr/fft/fft.go b/ecc/bls12-378/fr/fft/fft.go
index 66f299d78..532ed4e34 100644
--- a/ecc/bls12-378/fr/fft/fft.go
+++ b/ecc/bls12-378/fr/fft/fft.go
@@ -22,6 +22,8 @@ import (
 
 	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/internal/parallel"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
 )
 
 // Decimation is used in the FFT call to select decimation in time or in frequency
diff --git a/ecc/bls12-378/fr/fft/fft_test.go b/ecc/bls12-378/fr/fft/fft_test.go
index c7416fff7..39e067af8 100644
--- a/ecc/bls12-378/fr/fft/fft_test.go
+++ b/ecc/bls12-378/fr/fft/fft_test.go
@@ -21,6 +21,8 @@ import (
 	"strconv"
 	"testing"
 
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+
 	"github.com/leanovate/gopter"
 	"github.com/leanovate/gopter/gen"
 	"github.com/leanovate/gopter/prop"
diff --git a/ecc/bls12-378/fr/fft/fuzz.go b/ecc/bls12-378/fr/fft/fuzz.go
index 1c25b2420..8beef8c6b 100644
--- a/ecc/bls12-378/fr/fft/fuzz.go
+++ b/ecc/bls12-378/fr/fft/fuzz.go
@@ -23,6 +23,8 @@ import (
 	"bytes"
 	"fmt"
 	"github.com/consensys/gnark-crypto/ecc"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
 )
 
 const (
diff --git a/internal/generator/fft/template/domain.go.tmpl b/internal/generator/fft/template/domain.go.tmpl
index 7eb038518..269adacb0 100644
--- a/internal/generator/fft/template/domain.go.tmpl
+++ b/internal/generator/fft/template/domain.go.tmpl
@@ -63,7 +63,10 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 
 	// generator of the largest 2-adic subgroup
 	var rootOfUnity fr.Element
-	{{if eq .Name "bls12-377"}}
+	{{if eq .Name "bls12-378"}}
+		rootOfUnity.SetString("4045585818372166415418670827807793147093034396422209590578257013290761627990")
+		const maxOrderRoot uint64 = 42
+	{{else if eq .Name "bls12-377"}}
 		rootOfUnity.SetString("8065159656716812877374967518403273466521432693661810619979959746626482506078")
 		const maxOrderRoot uint64 = 47
 	{{else if eq .Name "bls12-381"}}
@@ -294,5 +297,3 @@ func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
 
 	return dec.BytesRead(), nil
 }
-
-
diff --git a/internal/generator/fft/template/imports.go.tmpl b/internal/generator/fft/template/imports.go.tmpl
index 70336db5d..f2b26bcc7 100644
--- a/internal/generator/fft/template/imports.go.tmpl
+++ b/internal/generator/fft/template/imports.go.tmpl
@@ -1,6 +1,8 @@
 {{ define "import_fr" }}
 
-{{ if eq .Name "bls12-377"}}
+{{ if eq .Name "bls12-378"}}
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+{{ else if eq .Name "bls12-377"}}
 	"github.com/consensys/gnark-crypto/ecc/bls12-377/fr"
 {{ else if eq .Name "bls12-381"}}
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/fr"
@@ -17,7 +19,9 @@
 {{end}}
 
 {{ define "import_curve" }}
-{{if eq .Name "bls12-377"}}
+{{if eq .Name "bls12-378"}}
+	curve "github.com/consensys/gnark-crypto/ecc/bls12-378"
+{{else if eq .Name "bls12-377"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bls12-377"
 {{else if eq .Name "bls12-381"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bls12-381"

From 2ef0d3616904d644ff5972001ef03248757adb98 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 21 Dec 2021 16:14:57 +0100
Subject: [PATCH 03/29] build: add bls12-378 to kzg and hash

---
 hash/hashes.go            | 7 +++++++
 internal/apicheck_test.go | 5 +++++
 kzg/kzg.go                | 3 +++
 3 files changed, 15 insertions(+)

diff --git a/hash/hashes.go b/hash/hashes.go
index 1845ce25a..01b9b86ae 100644
--- a/hash/hashes.go
+++ b/hash/hashes.go
@@ -21,6 +21,7 @@ import (
 	"hash"
 
 	bls377 "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/mimc"
+	bls378 "github.com/consensys/gnark-crypto/ecc/bls12-378/fr/mimc"
 	bls381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr/mimc"
 	bls315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/mimc"
 	bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
@@ -34,6 +35,7 @@ const (
 	MIMC_BN254 Hash = iota
 	MIMC_BLS12_381
 	MIMC_BLS12_377
+	MIMC_BLS12_378
 	MIMC_BW6_761
 	MIMC_BLS24_315
 	MIMC_BW6_633
@@ -44,6 +46,7 @@ var digestSize = []uint8{
 	MIMC_BN254:     32,
 	MIMC_BLS12_381: 48,
 	MIMC_BLS12_377: 48,
+	MIMC_BLS12_378: 48,
 	MIMC_BW6_761:   96,
 	MIMC_BLS24_315: 48,
 	MIMC_BW6_633:   80,
@@ -58,6 +61,8 @@ func (m Hash) New(seed string) hash.Hash {
 		return bls381.NewMiMC(seed)
 	case MIMC_BLS12_377:
 		return bls377.NewMiMC(seed)
+	case MIMC_BLS12_378:
+		return bls378.NewMiMC(seed)
 	case MIMC_BW6_761:
 		return bw761.NewMiMC(seed)
 	case MIMC_BLS24_315:
@@ -78,6 +83,8 @@ func (m Hash) String() string {
 		return "MIMC_BLS381"
 	case MIMC_BLS12_377:
 		return "MIMC_BLS377"
+	case MIMC_BLS12_378:
+		return "MIMC_BLS378"
 	case MIMC_BW6_761:
 		return "MIMC_BW761"
 	case MIMC_BLS24_315:
diff --git a/internal/apicheck_test.go b/internal/apicheck_test.go
index eebf5e3f6..384c138d1 100644
--- a/internal/apicheck_test.go
+++ b/internal/apicheck_test.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	bls377 "github.com/consensys/gnark-crypto/ecc/bls12-377"
+	bls378 "github.com/consensys/gnark-crypto/ecc/bls12-378"
 	bls381 "github.com/consensys/gnark-crypto/ecc/bls12-381"
 	"github.com/consensys/gnark-crypto/ecc/bn254"
 	bw761 "github.com/consensys/gnark-crypto/ecc/bw6-761"
@@ -14,6 +15,7 @@ var err error
 
 var (
 	gtbls377 bls377.GT
+	gtbls378 bls378.GT
 	gtbls381 bls381.GT
 	gtbn254  bn254.GT
 	gtbw761  bw761.GT
@@ -22,18 +24,21 @@ var (
 func init() {
 	// Pair
 	gtbls377, err = bls377.Pair([]bls377.G1Affine{}, []bls377.G2Affine{})
+	gtbls378, err = bls378.Pair([]bls378.G1Affine{}, []bls378.G2Affine{})
 	gtbls381, err = bls381.Pair([]bls381.G1Affine{}, []bls381.G2Affine{})
 	gtbn254, err = bn254.Pair([]bn254.G1Affine{}, []bn254.G2Affine{})
 	gtbw761, err = bw761.Pair([]bw761.G1Affine{}, []bw761.G2Affine{})
 
 	// MillerLoop
 	gtbls377, err = bls377.MillerLoop([]bls377.G1Affine{}, []bls377.G2Affine{})
+	gtbls378, err = bls378.MillerLoop([]bls378.G1Affine{}, []bls378.G2Affine{})
 	gtbls381, err = bls381.MillerLoop([]bls381.G1Affine{}, []bls381.G2Affine{})
 	gtbn254, err = bn254.MillerLoop([]bn254.G1Affine{}, []bn254.G2Affine{})
 	gtbw761, err = bw761.MillerLoop([]bw761.G1Affine{}, []bw761.G2Affine{})
 
 	// FinalExp
 	gtbls377 = bls377.FinalExponentiation(&gtbls377)
+	gtbls378 = bls378.FinalExponentiation(&gtbls378)
 	gtbls381 = bls381.FinalExponentiation(&gtbls381)
 	gtbn254 = bn254.FinalExponentiation(&gtbn254)
 	gtbw761 = bw761.FinalExponentiation(&gtbw761)
diff --git a/kzg/kzg.go b/kzg/kzg.go
index 88738ccae..2da1e58c5 100644
--- a/kzg/kzg.go
+++ b/kzg/kzg.go
@@ -9,6 +9,7 @@ import (
 	"github.com/consensys/gnark-crypto/ecc"
 
 	kzg_bls12377 "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/kzg"
+	kzg_bls12378 "github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
 	kzg_bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr/kzg"
 	kzg_bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/kzg"
 	kzg_bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/kzg"
@@ -29,6 +30,8 @@ func NewSRS(curveID ecc.ID) SRS {
 		return &kzg_bn254.SRS{}
 	case ecc.BLS12_377:
 		return &kzg_bls12377.SRS{}
+	case ecc.BLS12_378:
+		return &kzg_bls12378.SRS{}
 	case ecc.BLS12_381:
 		return &kzg_bls12381.SRS{}
 	case ecc.BLS24_315:

From ec341a37768d1f2e315310b0f49da96a41259792 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Sat, 25 Dec 2021 19:58:11 +0100
Subject: [PATCH 04/29] feat(bls12-378): add companion twisted edwards to
 GT-strong BLS12-378

---
 .../twistededwards/twistededwards.go          | 64 +++++++++++++++++++
 signature/signature.go                        |  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 ecc/bls12-378/twistededwards/twistededwards.go

diff --git a/ecc/bls12-378/twistededwards/twistededwards.go b/ecc/bls12-378/twistededwards/twistededwards.go
new file mode 100644
index 000000000..e2f98390c
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/twistededwards.go
@@ -0,0 +1,64 @@
+/*
+Copyright © 2020 ConsenSys
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package twistededwards
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element // in Montgomery form
+	Cofactor fr.Element // not in Montgomery form
+	Order    big.Int
+	Base     PointAffine
+}
+
+var edwards CurveParams
+
+// GetEdwardsCurve returns the twisted Edwards curve on BLS12-378's Fr
+func GetEdwardsCurve() CurveParams {
+
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&edwards.A)
+	res.D.Set(&edwards.D)
+	res.Cofactor.Set(&edwards.Cofactor)
+	res.Order.Set(&edwards.Order)
+	res.Base.Set(&edwards.Base)
+
+	return res
+}
+
+func init() {
+
+	edwards.A.SetString("1169928")
+	edwards.D.SetString("1169924")
+	edwards.Cofactor.SetUint64(8).FromMont()
+	edwards.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
+
+	edwards.Base.X.SetString("4274983589151226901853657690021194631121133716096168671136076068148698830183")
+	edwards.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
+}
+
+// mulByA multiplies fr.Element by edwards.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &edwards.A)
+}
diff --git a/signature/signature.go b/signature/signature.go
index 31cceb81b..df823b198 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -81,6 +81,7 @@ const (
 	EDDSA_BN254 SignatureScheme = iota
 	EDDSA_BLS12_381
 	EDDSA_BLS12_377
+	EDDSA_BLS12_378
 	EDDSA_BW6_761
 	EDDSA_BLS24_315
 	EDDSA_BW6_633

From 7c52c9d69b125d35948739507afc4d2a14c453ee Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Sat, 25 Dec 2021 20:06:48 +0100
Subject: [PATCH 05/29] fix: increment maxSignatures

---
 signature/signature.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/signature/signature.go b/signature/signature.go
index df823b198..f736ea653 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -75,7 +75,7 @@ type Signer interface {
 
 type SignatureScheme uint
 
-const maxSignatures = 6
+const maxSignatures = 7
 
 const (
 	EDDSA_BN254 SignatureScheme = iota

From ea8ec7c854c19621703da8a9f47c649ce6963b53 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 28 Dec 2021 11:18:28 +0100
Subject: [PATCH 06/29] feat: add bw6-756 (2-chain w/ bls12-378 GT-strong)

---
 ecc/bw6-756/bw6-756.go                        |  120 +
 ecc/bw6-756/doc.go                            |   18 +
 ecc/bw6-756/fp/arith.go                       |   60 +
 ecc/bw6-756/fp/asm.go                         |   24 +
 ecc/bw6-756/fp/asm_noadx.go                   |   25 +
 ecc/bw6-756/fp/bw6_utils.go                   |   27 +
 ecc/bw6-756/fp/doc.go                         |   43 +
 ecc/bw6-756/fp/element.go                     | 2722 ++++++++++++++++
 ecc/bw6-756/fp/element_exp.go                 | 1993 ++++++++++++
 ecc/bw6-756/fp/element_fuzz.go                |  200 ++
 ecc/bw6-756/fp/element_mul_adx_amd64.s        | 2739 ++++++++++++++++
 ecc/bw6-756/fp/element_mul_amd64.s            | 2759 ++++++++++++++++
 ecc/bw6-756/fp/element_ops_amd64.go           |   50 +
 ecc/bw6-756/fp/element_ops_amd64.s            |  746 +++++
 ecc/bw6-756/fp/element_ops_noasm.go           |   78 +
 ecc/bw6-756/fp/element_test.go                | 2777 +++++++++++++++++
 ecc/bw6-756/fr/arith.go                       |   60 +
 ecc/bw6-756/fr/asm.go                         |   24 +
 ecc/bw6-756/fr/asm_noadx.go                   |   25 +
 ecc/bw6-756/fr/doc.go                         |   43 +
 ecc/bw6-756/fr/element.go                     | 1720 ++++++++++
 ecc/bw6-756/fr/element_exp.go                 | 1040 ++++++
 ecc/bw6-756/fr/element_fuzz.go                |  152 +
 ecc/bw6-756/fr/element_mul_adx_amd64.s        |  836 +++++
 ecc/bw6-756/fr/element_mul_amd64.s            |  858 +++++
 ecc/bw6-756/fr/element_ops_amd64.go           |   50 +
 ecc/bw6-756/fr/element_ops_amd64.s            |  452 +++
 ecc/bw6-756/fr/element_ops_noasm.go           |   78 +
 ecc/bw6-756/fr/element_test.go                | 2681 ++++++++++++++++
 ecc/bw6-756/fr/fft/doc.go                     |   18 +
 ecc/bw6-756/fr/fft/domain.go                  |  300 ++
 ecc/bw6-756/fr/fft/domain_test.go             |   47 +
 ecc/bw6-756/fr/fft/fft.go                     |  319 ++
 ecc/bw6-756/fr/fft/fft_test.go                |  415 +++
 ecc/bw6-756/fr/fft/fuzz.go                    |   74 +
 ecc/bw6-756/fr/fft/fuzz_test.go               |   56 +
 ecc/bw6-756/fr/kzg/doc.go                     |   18 +
 ecc/bw6-756/fr/kzg/fuzz.go                    |   84 +
 ecc/bw6-756/fr/kzg/fuzz_test.go               |   56 +
 ecc/bw6-756/fr/kzg/kzg.go                     |  518 +++
 ecc/bw6-756/fr/kzg/kzg_test.go                |  453 +++
 ecc/bw6-756/fr/kzg/marshal.go                 |  138 +
 ecc/bw6-756/fr/mimc/doc.go                    |   18 +
 ecc/bw6-756/fr/mimc/fuzz.go                   |   34 +
 ecc/bw6-756/fr/mimc/mimc.go                   |  174 ++
 ecc/bw6-756/fr/permutation/doc.go             |   18 +
 ecc/bw6-756/fr/permutation/permutation.go     |  361 +++
 .../fr/permutation/permutation_test.go        |   94 +
 ecc/bw6-756/fr/plookup/doc.go                 |   18 +
 ecc/bw6-756/fr/plookup/plookup_test.go        |  139 +
 ecc/bw6-756/fr/plookup/table.go               |  252 ++
 ecc/bw6-756/fr/plookup/vector.go              |  687 ++++
 ecc/bw6-756/fr/polynomial/doc.go              |   18 +
 ecc/bw6-756/fr/polynomial/polynomial.go       |  123 +
 ecc/bw6-756/fr/polynomial/polynomial_test.go  |  208 ++
 ecc/bw6-756/fuzz.go                           |   76 +
 ecc/bw6-756/fuzz_test.go                      |   56 +
 ecc/bw6-756/g1.go                             | 1081 +++++++
 ecc/bw6-756/g1_test.go                        |  664 ++++
 ecc/bw6-756/g2.go                             |  933 ++++++
 ecc/bw6-756/g2_test.go                        |  664 ++++
 ecc/bw6-756/hash_to_curve.go                  |  262 ++
 ecc/bw6-756/internal/fptower/e3.go            |  299 ++
 ecc/bw6-756/internal/fptower/e3_test.go       |  330 ++
 ecc/bw6-756/internal/fptower/e6.go            |  412 +++
 ecc/bw6-756/internal/fptower/e6_pairing.go    |  127 +
 ecc/bw6-756/internal/fptower/e6_test.go       |  387 +++
 ecc/bw6-756/internal/fptower/frobenius.go     |  102 +
 .../internal/fptower/generators_test.go       |   43 +
 ecc/bw6-756/marshal.go                        | 1155 +++++++
 ecc/bw6-756/marshal_test.go                   |  457 +++
 ecc/bw6-756/multiexp.go                       |  983 ++++++
 ecc/bw6-756/multiexp_test.go                  |  701 +++++
 ecc/bw6-756/pairing.go                        |  366 +++
 ecc/bw6-756/pairing_test.go                   |  306 ++
 ecc/ecc.go                                    |    9 +-
 ecc/ecc.md                                    |    2 +
 ...60554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0 |  Bin 0 -> 6633 bytes
 ...f43429276019e3f31fc34200000000000000000000 |  Bin 0 -> 7376 bytes
 internal/generator/config/bw6-756.go          |   28 +
 internal/generator/tower/generate.go          |    2 +-
 81 files changed, 36482 insertions(+), 3 deletions(-)
 create mode 100644 ecc/bw6-756/bw6-756.go
 create mode 100644 ecc/bw6-756/doc.go
 create mode 100644 ecc/bw6-756/fp/arith.go
 create mode 100644 ecc/bw6-756/fp/asm.go
 create mode 100644 ecc/bw6-756/fp/asm_noadx.go
 create mode 100644 ecc/bw6-756/fp/bw6_utils.go
 create mode 100644 ecc/bw6-756/fp/doc.go
 create mode 100644 ecc/bw6-756/fp/element.go
 create mode 100644 ecc/bw6-756/fp/element_exp.go
 create mode 100644 ecc/bw6-756/fp/element_fuzz.go
 create mode 100644 ecc/bw6-756/fp/element_mul_adx_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_mul_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_ops_amd64.go
 create mode 100644 ecc/bw6-756/fp/element_ops_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_ops_noasm.go
 create mode 100644 ecc/bw6-756/fp/element_test.go
 create mode 100644 ecc/bw6-756/fr/arith.go
 create mode 100644 ecc/bw6-756/fr/asm.go
 create mode 100644 ecc/bw6-756/fr/asm_noadx.go
 create mode 100644 ecc/bw6-756/fr/doc.go
 create mode 100644 ecc/bw6-756/fr/element.go
 create mode 100644 ecc/bw6-756/fr/element_exp.go
 create mode 100644 ecc/bw6-756/fr/element_fuzz.go
 create mode 100644 ecc/bw6-756/fr/element_mul_adx_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_mul_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_ops_amd64.go
 create mode 100644 ecc/bw6-756/fr/element_ops_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_ops_noasm.go
 create mode 100644 ecc/bw6-756/fr/element_test.go
 create mode 100644 ecc/bw6-756/fr/fft/doc.go
 create mode 100644 ecc/bw6-756/fr/fft/domain.go
 create mode 100644 ecc/bw6-756/fr/fft/domain_test.go
 create mode 100644 ecc/bw6-756/fr/fft/fft.go
 create mode 100644 ecc/bw6-756/fr/fft/fft_test.go
 create mode 100644 ecc/bw6-756/fr/fft/fuzz.go
 create mode 100644 ecc/bw6-756/fr/fft/fuzz_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/doc.go
 create mode 100644 ecc/bw6-756/fr/kzg/fuzz.go
 create mode 100644 ecc/bw6-756/fr/kzg/fuzz_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/kzg.go
 create mode 100644 ecc/bw6-756/fr/kzg/kzg_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/marshal.go
 create mode 100644 ecc/bw6-756/fr/mimc/doc.go
 create mode 100644 ecc/bw6-756/fr/mimc/fuzz.go
 create mode 100644 ecc/bw6-756/fr/mimc/mimc.go
 create mode 100644 ecc/bw6-756/fr/permutation/doc.go
 create mode 100644 ecc/bw6-756/fr/permutation/permutation.go
 create mode 100644 ecc/bw6-756/fr/permutation/permutation_test.go
 create mode 100644 ecc/bw6-756/fr/plookup/doc.go
 create mode 100644 ecc/bw6-756/fr/plookup/plookup_test.go
 create mode 100644 ecc/bw6-756/fr/plookup/table.go
 create mode 100644 ecc/bw6-756/fr/plookup/vector.go
 create mode 100644 ecc/bw6-756/fr/polynomial/doc.go
 create mode 100644 ecc/bw6-756/fr/polynomial/polynomial.go
 create mode 100644 ecc/bw6-756/fr/polynomial/polynomial_test.go
 create mode 100644 ecc/bw6-756/fuzz.go
 create mode 100644 ecc/bw6-756/fuzz_test.go
 create mode 100644 ecc/bw6-756/g1.go
 create mode 100644 ecc/bw6-756/g1_test.go
 create mode 100644 ecc/bw6-756/g2.go
 create mode 100644 ecc/bw6-756/g2_test.go
 create mode 100644 ecc/bw6-756/hash_to_curve.go
 create mode 100644 ecc/bw6-756/internal/fptower/e3.go
 create mode 100644 ecc/bw6-756/internal/fptower/e3_test.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6_pairing.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6_test.go
 create mode 100644 ecc/bw6-756/internal/fptower/frobenius.go
 create mode 100644 ecc/bw6-756/internal/fptower/generators_test.go
 create mode 100644 ecc/bw6-756/marshal.go
 create mode 100644 ecc/bw6-756/marshal_test.go
 create mode 100644 ecc/bw6-756/multiexp.go
 create mode 100644 ecc/bw6-756/multiexp_test.go
 create mode 100644 ecc/bw6-756/pairing.go
 create mode 100644 ecc/bw6-756/pairing_test.go
 create mode 100644 internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
 create mode 100644 internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
 create mode 100644 internal/generator/config/bw6-756.go

diff --git a/ecc/bw6-756/bw6-756.go b/ecc/bw6-756/bw6-756.go
new file mode 100644
index 000000000..7b0d98fee
--- /dev/null
+++ b/ecc/bw6-756/bw6-756.go
@@ -0,0 +1,120 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// E: y**2=x**3+1
+// Etwist: y**2 = x**3+33
+// Tower: Fp->Fp6, u**6=33
+// Generator (same as BLS378): x=11045256207009841153
+// optimal Ate loops: x+1, x**2-x-1
+// Fp: p=366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+// Fr: r=605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+
+// ID BW6_756 ID
+const ID = ecc.BW6_756
+
+// bCurveCoeff b coeff of the curve
+var bCurveCoeff fp.Element
+
+// bTwistCurveCoeff b coeff of the twist (defined over Fp) curve
+var bTwistCurveCoeff fp.Element
+
+// generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+var g1Gen G1Jac
+var g2Gen G2Jac
+
+var g1GenAff G1Affine
+var g2GenAff G2Affine
+
+// point at infinity
+var g1Infinity G1Jac
+var g2Infinity G2Jac
+
+// optimal Ate loop counters
+var loopCounter0 [191]int8
+var loopCounter1 [191]int8
+
+// Parameters useful for the GLV scalar multiplication. The third roots define the
+//  endomorphisms phi1 and phi2 for <G1Affine> and <G2Affine>. lambda is such that <r, phi-lambda> lies above
+// <r> in the ring Z[phi]. More concretely it's the associated eigenvalue
+// of phi1 (resp phi2) restricted to <G1Affine> (resp <G2Affine>)
+// cf https://www.cosic.esat.kuleuven.be/nessie/reports/phase2/GLV.pdf
+var thirdRootOneG1 fp.Element
+var thirdRootOneG2 fp.Element
+var lambdaGLV big.Int
+
+// glvBasis stores R-linearly independant vectors (a,b), (c,d)
+// in ker((u,v)->u+vlambda[r]), and their determinant
+var glvBasis ecc.Lattice
+
+// generator of the curve
+var xGen big.Int
+
+func init() {
+
+	bCurveCoeff.SetOne()
+	bTwistCurveCoeff.MulByNonResidue(&bCurveCoeff)
+
+	// E(3,y) * cofactor
+	g1Gen.X.SetString("286035407532233812057489253822435660910062665263942803649298092690795938518721117964189338863504082781482751182899097859005716378386344565362972291164604792882058761734674709131229927253172681714645554597102571818586966737895501")
+	g1Gen.Y.SetString("250540671634276190125882738767359258920233951524378923555904955920886135268516617166458911260101792169356480449980342047600821278990712908224386045486820019065641642853528653616206514851361917670279865872746658429844440125628329")
+	g1Gen.Z.SetString("1")
+
+	// E(1,y) * cofactor
+	g2Gen.X.SetString("270164867145533700243149075881223225204067215320977230235816769808318087164726583740674261721395147407122688542569094772405350936550575160051166652281373572919753182191250641388443572739372443497834910784618354592418817138212395")
+	g2Gen.Y.SetString("296695446824796322573519291690935001172593568823998954880196613542512471119971074118215403545906873458039024520146929054366200365532511334310660691775675887531695313103875249166779149013653038059140912965769351316868363001510735")
+	g2Gen.Z.SetString("1")
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	// xGen+1
+	loopCounter0 = [191]int8{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, -1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+	// xGen^3-xGen^2-xGen
+	T, _ := new(big.Int).SetString("1347495683935914696108087318582641220368021451587784278015", 10)
+	ecc.NafDecomposition(T, loopCounter1[:])
+
+	g1Infinity.X.SetOne()
+	g1Infinity.Y.SetOne()
+	g2Infinity.X.SetOne()
+	g2Infinity.Y.SetOne()
+
+	thirdRootOneG2.SetString("99497571833115712246976573293861816254377473715694998268521440373748988342600853091641405554217584221455319677515385376103078837731420131015700054219263015095146628991433981753068027965212839748934246550470657")
+	thirdRootOneG1.Square(&thirdRootOneG2)
+	lambdaGLV.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337729", 10) // (x**5-3*x**4+3*x**3-x+1)
+	_r := fr.Modulus()
+	ecc.PrecomputeLattice(_r, &lambdaGLV, &glvBasis)
+
+	xGen.SetString("11045256207009841153", 10)
+
+}
+
+// Generators return the generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+func Generators() (g1Jac G1Jac, g2Jac G2Jac, g1Aff G1Affine, g2Aff G2Affine) {
+	g1Aff = g1GenAff
+	g2Aff = g2GenAff
+	g1Jac = g1Gen
+	g2Jac = g2Gen
+	return
+}
diff --git a/ecc/bw6-756/doc.go b/ecc/bw6-756/doc.go
new file mode 100644
index 000000000..6b1eaf6dd
--- /dev/null
+++ b/ecc/bw6-756/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package bw6756 efficient elliptic curve and pairing implementation for bw6-756.
+package bw6756
diff --git a/ecc/bw6-756/fp/arith.go b/ecc/bw6-756/fp/arith.go
new file mode 100644
index 000000000..66fa66748
--- /dev/null
+++ b/ecc/bw6-756/fp/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bw6-756/fp/asm.go b/ecc/bw6-756/fp/asm.go
new file mode 100644
index 000000000..7344271eb
--- /dev/null
+++ b/ecc/bw6-756/fp/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bw6-756/fp/asm_noadx.go b/ecc/bw6-756/fp/asm_noadx.go
new file mode 100644
index 000000000..ae778bd3a
--- /dev/null
+++ b/ecc/bw6-756/fp/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bw6-756/fp/bw6_utils.go b/ecc/bw6-756/fp/bw6_utils.go
new file mode 100644
index 000000000..58a2d0d10
--- /dev/null
+++ b/ecc/bw6-756/fp/bw6_utils.go
@@ -0,0 +1,27 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fp
+
+// MulByNonResidue multiplies a fp.Element by 33
+func (z *Element) MulByNonResidue(x *Element) *Element {
+	var t Element
+	t.Double(x).
+		Double(&t).
+		Double(&t).
+		Double(&t).
+		Double(&t)
+	z.Add(&t, x)
+	return z
+}
diff --git a/ecc/bw6-756/fp/doc.go b/ecc/bw6-756/fp/doc.go
new file mode 100644
index 000000000..033a5b2c8
--- /dev/null
+++ b/ecc/bw6-756/fp/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fp contains field arithmetic operations for modulus = 0xf76adb...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [12]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f868400000000000000000001 // base 16
+// 	366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849 // base 10
+package fp
diff --git a/ecc/bw6-756/fp/element.go b/ecc/bw6-756/fp/element.go
new file mode 100644
index 000000000..606e5f9dd
--- /dev/null
+++ b/ecc/bw6-756/fp/element.go
@@ -0,0 +1,2722 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 12 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+type Element [12]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 12
+
+// Bits number bits needed to represent Element
+const Bits = 756
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 1
+const qElementWord1 uint64 = 3731203976813871104
+const qElementWord2 uint64 = 15039355238879481536
+const qElementWord3 uint64 = 4828608925799409630
+const qElementWord4 uint64 = 16326337093237622437
+const qElementWord5 uint64 = 756237273905161798
+const qElementWord6 uint64 = 16934317532427647658
+const qElementWord7 uint64 = 14755673041361585881
+const qElementWord8 uint64 = 18154628166362162086
+const qElementWord9 uint64 = 6671956210750770825
+const qElementWord10 uint64 = 16333450281447942351
+const qElementWord11 uint64 = 4352613195430282
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+	qElementWord6,
+	qElementWord7,
+	qElementWord8,
+	qElementWord9,
+	qElementWord10,
+	qElementWord11,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 18446744073709551615
+
+// rSquare
+var rSquare = Element{
+	11214533042317621956,
+	4418601975293183768,
+	2233550636059863627,
+	13772400071271951950,
+	13010224617750716256,
+	15582310590478290871,
+	6301429202206019695,
+	15624904615961126890,
+	14411832617204527559,
+	10495912060283172777,
+	8432856701560321958,
+	4166778949326216,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	z[6] = x[6]
+	z[7] = x[7]
+	z[8] = x[8]
+	z[9] = x[9]
+	z[10] = x[10]
+	z[11] = x[11]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	z[6] = 0
+	z[7] = 0
+	z[8] = 0
+	z[9] = 0
+	z[10] = 0
+	z[11] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 18446744073709547378
+	z[1] = 14463961505609547775
+	z[2] = 15160016368967634470
+	z[3] = 12241294279704278364
+	z[4] = 2720419343484222500
+	z[5] = 4799902015386277509
+	z[6] = 8643488375494563078
+	z[7] = 18366804658688562287
+	z[8] = 2055362399696866477
+	z[9] = 3108243834975866807
+	z[10] = 9468215855567529777
+	z[11] = 369351476012747
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 12 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[11] == x[11]) && (z[10] == x[10]) && (z[9] == x[9]) && (z[8] == x[8]) && (z[7] == x[7]) && (z[6] == x[6]) && (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[11] > _x[11] {
+		return 1
+	} else if _z[11] < _x[11] {
+		return -1
+	}
+	if _z[10] > _x[10] {
+		return 1
+	} else if _z[10] < _x[10] {
+		return -1
+	}
+	if _z[9] > _x[9] {
+		return 1
+	} else if _z[9] < _x[9] {
+		return -1
+	}
+	if _z[8] > _x[8] {
+		return 1
+	} else if _z[8] < _x[8] {
+		return -1
+	}
+	if _z[7] > _x[7] {
+		return 1
+	} else if _z[7] < _x[7] {
+		return -1
+	}
+	if _z[6] > _x[6] {
+		return 1
+	} else if _z[6] < _x[6] {
+		return -1
+	}
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 1, 0)
+	_, b = bits.Sub64(_z[1], 1865601988406935552, b)
+	_, b = bits.Sub64(_z[2], 7519677619439740768, b)
+	_, b = bits.Sub64(_z[3], 11637676499754480623, b)
+	_, b = bits.Sub64(_z[4], 8163168546618811218, b)
+	_, b = bits.Sub64(_z[5], 378118636952580899, b)
+	_, b = bits.Sub64(_z[6], 17690530803068599637, b)
+	_, b = bits.Sub64(_z[7], 7377836520680792940, b)
+	_, b = bits.Sub64(_z[8], 18300686120035856851, b)
+	_, b = bits.Sub64(_z[9], 12559350142230161220, b)
+	_, b = bits.Sub64(_z[10], 8166725140723971175, b)
+	_, b = bits.Sub64(_z[11], 2176306597715141, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [96]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[6] = binary.BigEndian.Uint64(bytes[48:56])
+	z[7] = binary.BigEndian.Uint64(bytes[56:64])
+	z[8] = binary.BigEndian.Uint64(bytes[64:72])
+	z[9] = binary.BigEndian.Uint64(bytes[72:80])
+	z[10] = binary.BigEndian.Uint64(bytes[80:88])
+	z[11] = binary.BigEndian.Uint64(bytes[88:96])
+	z[11] %= 4352613195430282
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 1, 0)
+		z[1], carry = bits.Add64(z[1], 3731203976813871104, carry)
+		z[2], carry = bits.Add64(z[2], 15039355238879481536, carry)
+		z[3], carry = bits.Add64(z[3], 4828608925799409630, carry)
+		z[4], carry = bits.Add64(z[4], 16326337093237622437, carry)
+		z[5], carry = bits.Add64(z[5], 756237273905161798, carry)
+		z[6], carry = bits.Add64(z[6], 16934317532427647658, carry)
+		z[7], carry = bits.Add64(z[7], 14755673041361585881, carry)
+		z[8], carry = bits.Add64(z[8], 18154628166362162086, carry)
+		z[9], carry = bits.Add64(z[9], 6671956210750770825, carry)
+		z[10], carry = bits.Add64(z[10], 16333450281447942351, carry)
+		z[11], _ = bits.Add64(z[11], 4352613195430282, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] = z[5]>>1 | z[6]<<63
+	z[6] = z[6]>>1 | z[7]<<63
+	z[7] = z[7]>>1 | z[8]<<63
+	z[8] = z[8]>>1 | z[9]<<63
+	z[9] = z[9]>>1 | z[10]<<63
+	z[10] = z[10]>>1 | z[11]<<63
+	z[11] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [12]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd1(v, y[6], c[1])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd1(v, y[7], c[1])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd1(v, y[8], c[1])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd1(v, y[9], c[1])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd1(v, y[10], c[1])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd1(v, y[11], c[1])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 6
+		v := x[6]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 7
+		v := x[7]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 8
+		v := x[8]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 9
+		v := x[9]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 10
+		v := x[10]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 11
+		v := x[11]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], z[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], z[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], z[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], z[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], z[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], z[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		z[11], z[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [12]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 18446744073709551615
+		c2 := madd0(m, 1, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 3731203976813871104, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 15039355238879481536, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 4828608925799409630, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 16326337093237622437, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		c2, t[4] = madd2(m, 756237273905161798, c2, c0)
+		c1, c0 = madd1(y, x[6], c1)
+		c2, t[5] = madd2(m, 16934317532427647658, c2, c0)
+		c1, c0 = madd1(y, x[7], c1)
+		c2, t[6] = madd2(m, 14755673041361585881, c2, c0)
+		c1, c0 = madd1(y, x[8], c1)
+		c2, t[7] = madd2(m, 18154628166362162086, c2, c0)
+		c1, c0 = madd1(y, x[9], c1)
+		c2, t[8] = madd2(m, 6671956210750770825, c2, c0)
+		c1, c0 = madd1(y, x[10], c1)
+		c2, t[9] = madd2(m, 16333450281447942351, c2, c0)
+		c1, c0 = madd1(y, x[11], c1)
+		t[11], t[10] = madd3(m, 4352613195430282, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 6
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 7
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 8
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 9
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 10
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 11
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, z[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, z[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, z[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, z[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, z[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, z[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, z[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, z[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, z[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, z[9] = madd2(m, 16333450281447942351, c2, t[10])
+		z[11], z[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], carry = bits.Add64(x[5], y[5], carry)
+	z[6], carry = bits.Add64(x[6], y[6], carry)
+	z[7], carry = bits.Add64(x[7], y[7], carry)
+	z[8], carry = bits.Add64(x[8], y[8], carry)
+	z[9], carry = bits.Add64(x[9], y[9], carry)
+	z[10], carry = bits.Add64(x[10], y[10], carry)
+	z[11], _ = bits.Add64(x[11], y[11], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], carry = bits.Add64(x[5], x[5], carry)
+	z[6], carry = bits.Add64(x[6], x[6], carry)
+	z[7], carry = bits.Add64(x[7], x[7], carry)
+	z[8], carry = bits.Add64(x[8], x[8], carry)
+	z[9], carry = bits.Add64(x[9], x[9], carry)
+	z[10], carry = bits.Add64(x[10], x[10], carry)
+	z[11], _ = bits.Add64(x[11], x[11], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	z[6], b = bits.Sub64(x[6], y[6], b)
+	z[7], b = bits.Sub64(x[7], y[7], b)
+	z[8], b = bits.Sub64(x[8], y[8], b)
+	z[9], b = bits.Sub64(x[9], y[9], b)
+	z[10], b = bits.Sub64(x[10], y[10], b)
+	z[11], b = bits.Sub64(x[11], y[11], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 1, 0)
+		z[1], c = bits.Add64(z[1], 3731203976813871104, c)
+		z[2], c = bits.Add64(z[2], 15039355238879481536, c)
+		z[3], c = bits.Add64(z[3], 4828608925799409630, c)
+		z[4], c = bits.Add64(z[4], 16326337093237622437, c)
+		z[5], c = bits.Add64(z[5], 756237273905161798, c)
+		z[6], c = bits.Add64(z[6], 16934317532427647658, c)
+		z[7], c = bits.Add64(z[7], 14755673041361585881, c)
+		z[8], c = bits.Add64(z[8], 18154628166362162086, c)
+		z[9], c = bits.Add64(z[9], 6671956210750770825, c)
+		z[10], c = bits.Add64(z[10], 16333450281447942351, c)
+		z[11], _ = bits.Add64(z[11], 4352613195430282, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(1, x[0], 0)
+	z[1], borrow = bits.Sub64(3731203976813871104, x[1], borrow)
+	z[2], borrow = bits.Sub64(15039355238879481536, x[2], borrow)
+	z[3], borrow = bits.Sub64(4828608925799409630, x[3], borrow)
+	z[4], borrow = bits.Sub64(16326337093237622437, x[4], borrow)
+	z[5], borrow = bits.Sub64(756237273905161798, x[5], borrow)
+	z[6], borrow = bits.Sub64(16934317532427647658, x[6], borrow)
+	z[7], borrow = bits.Sub64(14755673041361585881, x[7], borrow)
+	z[8], borrow = bits.Sub64(18154628166362162086, x[8], borrow)
+	z[9], borrow = bits.Sub64(6671956210750770825, x[9], borrow)
+	z[10], borrow = bits.Sub64(16333450281447942351, x[10], borrow)
+	z[11], _ = bits.Sub64(4352613195430282, x[11], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[11] != 0 {
+		return 704 + bits.Len64(z[11])
+	}
+	if z[10] != 0 {
+		return 640 + bits.Len64(z[10])
+	}
+	if z[9] != 0 {
+		return 576 + bits.Len64(z[9])
+	}
+	if z[8] != 0 {
+		return 512 + bits.Len64(z[8])
+	}
+	if z[7] != 0 {
+		return 448 + bits.Len64(z[7])
+	}
+	if z[6] != 0 {
+		return 384 + bits.Len64(z[6])
+	}
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[88:96], z[0])
+	binary.BigEndian.PutUint64(b[80:88], z[1])
+	binary.BigEndian.PutUint64(b[72:80], z[2])
+	binary.BigEndian.PutUint64(b[64:72], z[3])
+	binary.BigEndian.PutUint64(b[56:64], z[4])
+	binary.BigEndian.PutUint64(b[48:56], z[5])
+	binary.BigEndian.PutUint64(b[40:48], z[6])
+	binary.BigEndian.PutUint64(b[32:40], z[7])
+	binary.BigEndian.PutUint64(b[24:32], z[8])
+	binary.BigEndian.PutUint64(b[16:24], z[9])
+	binary.BigEndian.PutUint64(b[8:16], z[10])
+	binary.BigEndian.PutUint64(b[0:8], z[11])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[88:96], _z[0])
+	binary.BigEndian.PutUint64(res[80:88], _z[1])
+	binary.BigEndian.PutUint64(res[72:80], _z[2])
+	binary.BigEndian.PutUint64(res[64:72], _z[3])
+	binary.BigEndian.PutUint64(res[56:64], _z[4])
+	binary.BigEndian.PutUint64(res[48:56], _z[5])
+	binary.BigEndian.PutUint64(res[40:48], _z[6])
+	binary.BigEndian.PutUint64(res[32:40], _z[7])
+	binary.BigEndian.PutUint64(res[24:32], _z[8])
+	binary.BigEndian.PutUint64(res[16:24], _z[9])
+	binary.BigEndian.PutUint64(res[8:16], _z[10])
+	binary.BigEndian.PutUint64(res[0:8], _z[11])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[11] == 369351476012747) && (l[10] == 9468215855567529777) && (l[9] == 3108243834975866807) && (l[8] == 2055362399696866477) && (l[7] == 18366804658688562287) && (l[6] == 8643488375494563078) && (l[5] == 4799902015386277509) && (l[4] == 2720419343484222500) && (l[3] == 12241294279704278364) && (l[2] == 15160016368967634470) && (l[1] == 14463961505609547775) && (l[0] == 18446744073709547378) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		17302715199413996045,
+		15077845457253267709,
+		8842885729139027579,
+		12189878420705505575,
+		12380986790262239346,
+		585111498723936856,
+		4947215576903759546,
+		1186632482028566920,
+		14543050817583235372,
+		5644943604719368358,
+		9440830989708189862,
+		1039766423535362,
+	}
+	r := uint64(82)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[11] == 369351476012747) && (t[10] == 9468215855567529777) && (t[9] == 3108243834975866807) && (t[8] == 2055362399696866477) && (t[7] == 18366804658688562287) && (t[6] == 8643488375494563078) && (t[5] == 4799902015386277509) && (t[4] == 2720419343484222500) && (t[3] == 12241294279704278364) && (t[2] == 15160016368967634470) && (t[1] == 14463961505609547775) && (t[0] == 18446744073709547378)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[11] == 369351476012747) && (t[10] == 9468215855567529777) && (t[9] == 3108243834975866807) && (t[8] == 2055362399696866477) && (t[7] == 18366804658688562287) && (t[6] == 8643488375494563078) && (t[5] == 4799902015386277509) && (t[4] == 2720419343484222500) && (t[3] == 12241294279704278364) && (t[2] == 15160016368967634470) && (t[1] == 14463961505609547775) && (t[0] == 18446744073709547378)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 16061512306393401370
+const inversionCorrectionFactorWord1 = 12469388396993975658
+const inversionCorrectionFactorWord2 = 12941199289357671440
+const inversionCorrectionFactorWord3 = 7124172912896157387
+const inversionCorrectionFactorWord4 = 7772575019676086033
+const inversionCorrectionFactorWord5 = 5410978411075096125
+const inversionCorrectionFactorWord6 = 15135850590536056079
+const inversionCorrectionFactorWord7 = 14366933837510102702
+const inversionCorrectionFactorWord8 = 17864238268145908760
+const inversionCorrectionFactorWord9 = 11845167622525040086
+const inversionCorrectionFactorWord10 = 12428223085045138512
+const inversionCorrectionFactorWord11 = 2992926161591192
+
+const invIterationsN = 50
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+		qElementWord6,
+		qElementWord7,
+		qElementWord8,
+		qElementWord9,
+		qElementWord10,
+		qElementWord11,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN)
+		a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN)
+		a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN)
+		a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN)
+		a[9] = (a[9] >> approxLowBitsN) | ((a[10]) << approxHighBitsN)
+		a[10] = (a[10] >> approxLowBitsN) | ((a[11]) << approxHighBitsN)
+		a[11] = (a[11] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN)
+		b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN)
+		b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN)
+		b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN)
+		b[9] = (b[9] >> approxLowBitsN) | ((b[10]) << approxHighBitsN)
+		b[10] = (b[10] >> approxLowBitsN) | ((b[11]) << approxHighBitsN)
+		b[11] = (b[11] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+		inversionCorrectionFactorWord6,
+		inversionCorrectionFactorWord7,
+		inversionCorrectionFactorWord8,
+		inversionCorrectionFactorWord9,
+		inversionCorrectionFactorWord10,
+		inversionCorrectionFactorWord11,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+	C, t[6] = madd2(m, qElementWord6, x[6], C)
+	C, t[7] = madd2(m, qElementWord7, x[7], C)
+	C, t[8] = madd2(m, qElementWord8, x[8], C)
+	C, t[9] = madd2(m, qElementWord9, x[9], C)
+	C, t[10] = madd2(m, qElementWord10, x[10], C)
+	C, t[11] = madd2(m, qElementWord11, x[11], C)
+
+	// the high word of m * qElement[11] is at most 62 bits
+	// x[11] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[12] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 6
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 7
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 8
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 9
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 10
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 11
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		C, z[4] = madd2(m, qElementWord5, t[i+5], C)
+		C, z[5] = madd2(m, qElementWord6, t[i+6], C)
+		C, z[6] = madd2(m, qElementWord7, t[i+7], C)
+		C, z[7] = madd2(m, qElementWord8, t[i+8], C)
+		C, z[8] = madd2(m, qElementWord9, t[i+9], C)
+		C, z[9] = madd2(m, qElementWord10, t[i+10], C)
+		z[11], z[10] = madd2(m, qElementWord11, t[i+11], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+		z[6], b = bits.Sub64(z[6], 0, b)
+		z[7], b = bits.Sub64(z[7], 0, b)
+		z[8], b = bits.Sub64(z[8], 0, b)
+		z[9], b = bits.Sub64(z[9], 0, b)
+		z[10], b = bits.Sub64(z[10], 0, b)
+		z[11], b = bits.Sub64(z[11], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[11] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], b = bits.Add64(z[5], qElementWord5, b)
+			z[6], b = bits.Add64(z[6], qElementWord6, b)
+			z[7], b = bits.Add64(z[7], qElementWord7, b)
+			z[8], b = bits.Add64(z[8], qElementWord8, b)
+			z[9], b = bits.Add64(z[9], qElementWord9, b)
+			z[10], b = bits.Add64(z[10], qElementWord10, b)
+			z[11], _ = bits.Add64(neg1, qElementWord11, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	z[6], b = bits.Sub64(0, x[6], b)
+	z[7], b = bits.Sub64(0, x[7], b)
+	z[8], b = bits.Sub64(0, x[8], b)
+	z[9], b = bits.Sub64(0, x[9], b)
+	z[10], b = bits.Sub64(0, x[10], b)
+	z[11], b = bits.Sub64(0, x[11], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+	c, z[6] = madd1(x[6], w, c)
+	c, z[7] = madd1(x[7], w, c)
+	c, z[8] = madd1(x[8], w, c)
+	c, z[9] = madd1(x[9], w, c)
+	c, z[10] = madd1(x[10], w, c)
+	c, z[11] = madd1(x[11], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 6 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 6 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[6], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 7 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 7 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[7], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 8 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 8 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[8], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 9 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 9 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[9], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 10 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 10 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[10], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 11 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 11 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[11], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 12 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 11
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+	z[6], carry = bits.Add64(z[6], yTimes[6], carry)
+	z[7], carry = bits.Add64(z[7], yTimes[7], carry)
+	z[8], carry = bits.Add64(z[8], yTimes[8], carry)
+	z[9], carry = bits.Add64(z[9], yTimes[9], carry)
+	z[10], carry = bits.Add64(z[10], yTimes[10], carry)
+	z[11], carry = bits.Add64(z[11], yTimes[11], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bw6-756/fp/element_exp.go b/ecc/bw6-756/fp/element_exp.go
new file mode 100644
index 000000000..af741d643
--- /dev/null
+++ b/ecc/bw6-756/fp/element_exp.go
@@ -0,0 +1,1993 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// expBySqrtExp is equivalent to z.Exp(x, 1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_100     = 1 + _11
+	//	_101     = 1 + _100
+	//	_1001    = _100 + _101
+	//	_1011    = _10 + _1001
+	//	_1101    = _10 + _1011
+	//	_1111    = _10 + _1101
+	//	_10001   = _10 + _1111
+	//	_10101   = _100 + _10001
+	//	_10111   = _10 + _10101
+	//	_11001   = _10 + _10111
+	//	_11011   = _10 + _11001
+	//	_11101   = _10 + _11011
+	//	_11111   = _10 + _11101
+	//	_100001  = _10 + _11111
+	//	_100011  = _10 + _100001
+	//	_100101  = _10 + _100011
+	//	_100111  = _10 + _100101
+	//	_101001  = _10 + _100111
+	//	_101011  = _10 + _101001
+	//	_101101  = _10 + _101011
+	//	_101111  = _10 + _101101
+	//	_110001  = _10 + _101111
+	//	_110011  = _10 + _110001
+	//	_110101  = _10 + _110011
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111010 = _111011 + _111111
+	//	i52      = ((_1111010 << 4 + _11011) << 7 + _101011) << 7
+	//	i67      = ((_110111 + i52) << 7 + _110101) << 5 + _10111
+	//	i87      = ((i67 << 7 + _111001) << 5 + _10001) << 6
+	//	i101     = ((_10111 + i87) << 8 + _10101) << 3 + _11
+	//	i128     = ((i101 << 9 + _1001) << 8 + _111111) << 8
+	//	i145     = ((_1111 + i128) << 9 + _110101) << 5 + _1101
+	//	i167     = ((i145 << 9 + _110011) << 6 + _110101) << 5
+	//	i187     = ((_11001 + i167) << 8 + _101111) << 9 + _110011
+	//	i205     = ((i187 << 7 + _100101) << 6 + _111101) << 3
+	//	i223     = ((_11 + i205) << 8 + _1011) << 7 + _11101
+	//	i244     = ((i223 << 9 + _100111) << 6 + _111011) << 4
+	//	i262     = ((_1111 + i244) << 8 + _100011) << 7 + _10001
+	//	i285     = ((i262 << 7 + _101) << 8 + _10101) << 6
+	//	i299     = ((_10001 + i285) << 7 + _110001) << 4 + _1101
+	//	i325     = ((i299 << 7 + _11011) << 8 + _110011) << 9
+	//	i341     = ((_110101 + i325) << 7 + _111001) << 6 + _110011
+	//	i366     = ((i341 << 6 + _110001) << 9 + _10101) << 8
+	//	i383     = ((_100011 + i366) << 6 + _11011) << 8 + _111101
+	//	i401     = ((i383 << 3 + _11) << 10 + _1011) << 3
+	//	i422     = ((1 + i401) << 12 + _100101) << 6 + _110101
+	//	i448     = ((i422 << 12 + _100111) << 6 + _110101) << 6
+	//	i467     = ((_10101 + i448) << 11 + _101001) << 5 + _11111
+	//	i490     = ((i467 << 5 + _1011) << 9 + _111001) << 7
+	//	i508     = ((_110011 + i490) << 4 + _1101) << 11 + _110111
+	//	i535     = ((i508 << 7 + _11001) << 9 + _110111) << 9
+	//	i550     = ((_101001 + i535) << 6 + _1011) << 6 + _1101
+	//	i572     = ((i550 << 9 + _101011) << 5 + _11011) << 6
+	//	i590     = ((_11011 + i572) << 6 + _11001) << 9 + _110101
+	//	i616     = ((i590 << 7 + _10101) << 6 + _11) << 11
+	//	i630     = ((_10101 + i616) << 4 + _101) << 7 + _1111
+	//	i653     = ((i630 << 10 + _100101) << 6 + _100011) << 5
+	//	i670     = ((_1111 + i653) << 7 + _11111) << 7 + _111101
+	//	i688     = ((i670 << 3 + _101) << 10 + _101101) << 3
+	//	i708     = ((_101 + i688) << 10 + _101111) << 7 + _100001
+	//	i731     = ((i708 << 3 + _101) << 10 + _101001) << 8
+	//	i751     = ((_100111 + i731) << 3 + _11) << 14 + _110011
+	//	i768     = ((i751 << 6 + _110001) << 5 + _11111) << 4
+	//	i781     = 2*((_11 + i768) << 9 + _111111) + 1
+	//	return     (i781 << 8 + _1101) << 4
+	//
+	// Operations: 667 squares 127 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+		t19 = new(Element)
+		t20 = new(Element)
+		t21 = new(Element)
+		t22 = new(Element)
+		t23 = new(Element)
+		t24 = new(Element)
+		t25 = new(Element)
+		t26 = new(Element)
+		t27 = new(Element)
+		t28 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24,t25,t26,t27,t28 Element
+	// Step 1: t0 = x^0x2
+	t0.Square(&x)
+
+	// Step 2: t1 = x^0x3
+	t1.Mul(&x, t0)
+
+	// Step 3: t2 = x^0x4
+	t2.Mul(&x, t1)
+
+	// Step 4: t7 = x^0x5
+	t7.Mul(&x, t2)
+
+	// Step 5: t26 = x^0x9
+	t26.Mul(t2, t7)
+
+	// Step 6: t20 = x^0xb
+	t20.Mul(t0, t26)
+
+	// Step 7: z = x^0xd
+	z.Mul(t0, t20)
+
+	// Step 8: t12 = x^0xf
+	t12.Mul(t0, z)
+
+	// Step 9: t23 = x^0x11
+	t23.Mul(t0, t12)
+
+	// Step 10: t15 = x^0x15
+	t15.Mul(t2, t23)
+
+	// Step 11: t27 = x^0x17
+	t27.Mul(t0, t15)
+
+	// Step 12: t17 = x^0x19
+	t17.Mul(t0, t27)
+
+	// Step 13: t18 = x^0x1b
+	t18.Mul(t0, t17)
+
+	// Step 14: t25 = x^0x1d
+	t25.Mul(t0, t18)
+
+	// Step 15: t2 = x^0x1f
+	t2.Mul(t0, t25)
+
+	// Step 16: t8 = x^0x21
+	t8.Mul(t0, t2)
+
+	// Step 17: t13 = x^0x23
+	t13.Mul(t0, t8)
+
+	// Step 18: t14 = x^0x25
+	t14.Mul(t0, t13)
+
+	// Step 19: t5 = x^0x27
+	t5.Mul(t0, t14)
+
+	// Step 20: t6 = x^0x29
+	t6.Mul(t0, t5)
+
+	// Step 21: t19 = x^0x2b
+	t19.Mul(t0, t6)
+
+	// Step 22: t10 = x^0x2d
+	t10.Mul(t0, t19)
+
+	// Step 23: t9 = x^0x2f
+	t9.Mul(t0, t10)
+
+	// Step 24: t3 = x^0x31
+	t3.Mul(t0, t9)
+
+	// Step 25: t4 = x^0x33
+	t4.Mul(t0, t3)
+
+	// Step 26: t16 = x^0x35
+	t16.Mul(t0, t4)
+
+	// Step 27: t21 = x^0x37
+	t21.Mul(t0, t16)
+
+	// Step 28: t22 = x^0x39
+	t22.Mul(t0, t21)
+
+	// Step 29: t24 = x^0x3b
+	t24.Mul(t0, t22)
+
+	// Step 30: t11 = x^0x3d
+	t11.Mul(t0, t24)
+
+	// Step 31: t0 = x^0x3f
+	t0.Mul(t0, t11)
+
+	// Step 32: t28 = x^0x7a
+	t28.Mul(t24, t0)
+
+	// Step 36: t28 = x^0x7a0
+	for s := 0; s < 4; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 37: t28 = x^0x7bb
+	t28.Mul(t18, t28)
+
+	// Step 44: t28 = x^0x3dd80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 45: t28 = x^0x3ddab
+	t28.Mul(t19, t28)
+
+	// Step 52: t28 = x^0x1eed580
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 53: t28 = x^0x1eed5b7
+	t28.Mul(t21, t28)
+
+	// Step 60: t28 = x^0xf76adb80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 61: t28 = x^0xf76adbb5
+	t28.Mul(t16, t28)
+
+	// Step 66: t28 = x^0x1eed5b76a0
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 67: t28 = x^0x1eed5b76b7
+	t28.Mul(t27, t28)
+
+	// Step 74: t28 = x^0xf76adbb5b80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 75: t28 = x^0xf76adbb5bb9
+	t28.Mul(t22, t28)
+
+	// Step 80: t28 = x^0x1eed5b76b7720
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 81: t28 = x^0x1eed5b76b7731
+	t28.Mul(t23, t28)
+
+	// Step 87: t28 = x^0x7bb56ddaddcc40
+	for s := 0; s < 6; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 88: t27 = x^0x7bb56ddaddcc57
+	t27.Mul(t27, t28)
+
+	// Step 96: t27 = x^0x7bb56ddaddcc5700
+	for s := 0; s < 8; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 97: t27 = x^0x7bb56ddaddcc5715
+	t27.Mul(t15, t27)
+
+	// Step 100: t27 = x^0x3ddab6ed6ee62b8a8
+	for s := 0; s < 3; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 101: t27 = x^0x3ddab6ed6ee62b8ab
+	t27.Mul(t1, t27)
+
+	// Step 110: t27 = x^0x7bb56ddaddcc5715600
+	for s := 0; s < 9; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 111: t26 = x^0x7bb56ddaddcc5715609
+	t26.Mul(t26, t27)
+
+	// Step 119: t26 = x^0x7bb56ddaddcc571560900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 120: t26 = x^0x7bb56ddaddcc57156093f
+	t26.Mul(t0, t26)
+
+	// Step 128: t26 = x^0x7bb56ddaddcc57156093f00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 129: t26 = x^0x7bb56ddaddcc57156093f0f
+	t26.Mul(t12, t26)
+
+	// Step 138: t26 = x^0xf76adbb5bb98ae2ac127e1e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 139: t26 = x^0xf76adbb5bb98ae2ac127e1e35
+	t26.Mul(t16, t26)
+
+	// Step 144: t26 = x^0x1eed5b76b77315c55824fc3c6a0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 145: t26 = x^0x1eed5b76b77315c55824fc3c6ad
+	t26.Mul(z, t26)
+
+	// Step 154: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 155: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33
+	t26.Mul(t4, t26)
+
+	// Step 161: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cc0
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 162: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cf5
+	t26.Mul(t16, t26)
+
+	// Step 167: t26 = x^0x1eed5b76b77315c55824fc3c6ad19ea0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 168: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb9
+	t26.Mul(t17, t26)
+
+	// Step 176: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 177: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f
+	t26.Mul(t9, t26)
+
+	// Step 186: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 187: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e33
+	t26.Mul(t4, t26)
+
+	// Step 194: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f1980
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 195: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5
+	t26.Mul(t14, t26)
+
+	// Step 201: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc66940
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 202: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d
+	t26.Mul(t11, t26)
+
+	// Step 205: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334be8
+	for s := 0; s < 3; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 206: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb
+	t26.Mul(t1, t26)
+
+	// Step 214: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 215: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b
+	t26.Mul(t20, t26)
+
+	// Step 222: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f58580
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 223: t25 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d
+	t25.Mul(t25, t26)
+
+	// Step 232: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a00
+	for s := 0; s < 9; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 233: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27
+	t25.Mul(t5, t25)
+
+	// Step 239: t25 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89c0
+	for s := 0; s < 6; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 240: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb
+	t24.Mul(t24, t25)
+
+	// Step 244: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb0
+	for s := 0; s < 4; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 245: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf
+	t24.Mul(t12, t24)
+
+	// Step 253: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf00
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 254: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23
+	t24.Mul(t13, t24)
+
+	// Step 261: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9180
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 262: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9191
+	t24.Mul(t23, t24)
+
+	// Step 269: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c880
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 270: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c885
+	t24.Mul(t7, t24)
+
+	// Step 278: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88500
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 279: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515
+	t24.Mul(t15, t24)
+
+	// Step 285: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214540
+	for s := 0; s < 6; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 286: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214551
+	t23.Mul(t23, t24)
+
+	// Step 293: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a880
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 294: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1
+	t23.Mul(t3, t23)
+
+	// Step 298: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b10
+	for s := 0; s < 4; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 299: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d
+	t23.Mul(z, t23)
+
+	// Step 306: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 307: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b
+	t23.Mul(t18, t23)
+
+	// Step 315: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 316: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b33
+	t23.Mul(t4, t23)
+
+	// Step 325: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 326: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635
+	t23.Mul(t16, t23)
+
+	// Step 333: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331a80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 334: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9
+	t23.Mul(t22, t23)
+
+	// Step 340: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 341: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73
+	t23.Mul(t4, t23)
+
+	// Step 347: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cc0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 348: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf1
+	t23.Mul(t3, t23)
+
+	// Step 357: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e200
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 358: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215
+	t23.Mul(t15, t23)
+
+	// Step 366: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21500
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 367: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21523
+	t23.Mul(t13, t23)
+
+	// Step 373: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 374: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db
+	t23.Mul(t18, t23)
+
+	// Step 382: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 383: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d
+	t23.Mul(t11, t23)
+
+	// Step 386: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9e8
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 387: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb
+	t23.Mul(t1, t23)
+
+	// Step 397: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac00
+	for s := 0; s < 10; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 398: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b
+	t23.Mul(t20, t23)
+
+	// Step 401: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6058
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 402: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059
+	t23.Mul(&x, t23)
+
+	// Step 414: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 415: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025
+	t23.Mul(t14, t23)
+
+	// Step 421: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640940
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 422: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975
+	t23.Mul(t16, t23)
+
+	// Step 434: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 435: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027
+	t23.Mul(t5, t23)
+
+	// Step 441: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 442: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f5
+	t23.Mul(t16, t23)
+
+	// Step 448: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 449: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55
+	t23.Mul(t15, t23)
+
+	// Step 460: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa800
+	for s := 0; s < 11; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 461: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829
+	t23.Mul(t6, t23)
+
+	// Step 466: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d550520
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 467: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f
+	t23.Mul(t2, t23)
+
+	// Step 472: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7e0
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 473: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb
+	t23.Mul(t20, t23)
+
+	// Step 482: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 483: t22 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd639
+	t22.Mul(t22, t23)
+
+	// Step 490: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1c80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 491: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3
+	t22.Mul(t4, t22)
+
+	// Step 495: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb30
+	for s := 0; s < 4; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 496: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d
+	t22.Mul(z, t22)
+
+	// Step 507: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e800
+	for s := 0; s < 11; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 508: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e837
+	t22.Mul(t21, t22)
+
+	// Step 515: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 516: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b99
+	t22.Mul(t17, t22)
+
+	// Step 525: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373200
+	for s := 0; s < 9; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 526: t21 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237
+	t21.Mul(t21, t22)
+
+	// Step 535: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e00
+	for s := 0; s < 9; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 536: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e29
+	t21.Mul(t6, t21)
+
+	// Step 542: t21 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a40
+	for s := 0; s < 6; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 543: t20 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b
+	t20.Mul(t20, t21)
+
+	// Step 549: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292c0
+	for s := 0; s < 6; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 550: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd
+	t20.Mul(z, t20)
+
+	// Step 559: t20 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a00
+	for s := 0; s < 9; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 560: t19 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2b
+	t19.Mul(t19, t20)
+
+	// Step 565: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b34560
+	for s := 0; s < 5; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 566: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b
+	t19.Mul(t18, t19)
+
+	// Step 572: t19 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15ec0
+	for s := 0; s < 6; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 573: t18 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb
+	t18.Mul(t18, t19)
+
+	// Step 579: t18 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 580: t17 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d9
+	t17.Mul(t17, t18)
+
+	// Step 589: t17 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db200
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 590: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db235
+	t16.Mul(t16, t17)
+
+	// Step 597: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a80
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 598: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a95
+	t16.Mul(t15, t16)
+
+	// Step 604: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a540
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 605: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a543
+	t16.Mul(t1, t16)
+
+	// Step 616: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1800
+	for s := 0; s < 11; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 617: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1815
+	t15.Mul(t15, t16)
+
+	// Step 621: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18150
+	for s := 0; s < 4; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 622: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18155
+	t15.Mul(t7, t15)
+
+	// Step 629: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa80
+	for s := 0; s < 7; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 630: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f
+	t15.Mul(t12, t15)
+
+	// Step 640: t15 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c00
+	for s := 0; s < 10; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 641: t14 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c25
+	t14.Mul(t14, t15)
+
+	// Step 647: t14 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0940
+	for s := 0; s < 6; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 648: t13 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963
+	t13.Mul(t13, t14)
+
+	// Step 653: t13 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c60
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 654: t12 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f
+	t12.Mul(t12, t13)
+
+	// Step 661: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963780
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 662: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f
+	t12.Mul(t2, t12)
+
+	// Step 669: t12 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcf80
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 670: t11 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbd
+	t11.Mul(t11, t12)
+
+	// Step 673: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7de8
+	for s := 0; s < 3; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 674: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded
+	t11.Mul(t7, t11)
+
+	// Step 684: t11 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b400
+	for s := 0; s < 10; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 685: t10 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42d
+	t10.Mul(t10, t11)
+
+	// Step 688: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda168
+	for s := 0; s < 3; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 689: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d
+	t10.Mul(t7, t10)
+
+	// Step 699: t10 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b400
+	for s := 0; s < 10; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 700: t9 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f
+	t9.Mul(t9, t10)
+
+	// Step 707: t9 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da1780
+	for s := 0; s < 7; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 708: t8 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1
+	t8.Mul(t8, t9)
+
+	// Step 711: t8 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd08
+	for s := 0; s < 3; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 712: t7 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d
+	t7.Mul(t7, t8)
+
+	// Step 722: t7 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43400
+	for s := 0; s < 10; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 723: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429
+	t6.Mul(t6, t7)
+
+	// Step 731: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 732: t5 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342927
+	t5.Mul(t5, t6)
+
+	// Step 735: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a14938
+	for s := 0; s < 3; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 736: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b
+	t5.Mul(t1, t5)
+
+	// Step 750: t5 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec000
+	for s := 0; s < 14; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 751: t4 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033
+	t4.Mul(t4, t5)
+
+	// Step 757: t4 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cc0
+	for s := 0; s < 6; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 758: t3 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1
+	t3.Mul(t3, t4)
+
+	// Step 763: t3 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e20
+	for s := 0; s < 5; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 764: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f
+	t2.Mul(t2, t3)
+
+	// Step 768: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f0
+	for s := 0; s < 4; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 769: t1 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f3
+	t1.Mul(t1, t2)
+
+	// Step 778: t1 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e600
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 779: t0 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f
+	t0.Mul(t0, t1)
+
+	// Step 780: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7e
+	t0.Square(t0)
+
+	// Step 781: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f
+	t0.Mul(&x, t0)
+
+	// Step 789: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f00
+	for s := 0; s < 8; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 790: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d
+	z.Mul(z, t0)
+
+	// Step 794: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_100     = 1 + _11
+	//	_101     = 1 + _100
+	//	_1001    = _100 + _101
+	//	_1011    = _10 + _1001
+	//	_1101    = _10 + _1011
+	//	_1111    = _10 + _1101
+	//	_10001   = _10 + _1111
+	//	_10101   = _100 + _10001
+	//	_10111   = _10 + _10101
+	//	_11001   = _10 + _10111
+	//	_11011   = _10 + _11001
+	//	_11101   = _10 + _11011
+	//	_11111   = _10 + _11101
+	//	_100001  = _10 + _11111
+	//	_100011  = _10 + _100001
+	//	_100101  = _10 + _100011
+	//	_100111  = _10 + _100101
+	//	_101001  = _10 + _100111
+	//	_101011  = _10 + _101001
+	//	_101101  = _10 + _101011
+	//	_101111  = _10 + _101101
+	//	_110001  = _10 + _101111
+	//	_110011  = _10 + _110001
+	//	_110101  = _10 + _110011
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111010 = _111011 + _111111
+	//	i52      = ((_1111010 << 4 + _11011) << 7 + _101011) << 7
+	//	i67      = ((_110111 + i52) << 7 + _110101) << 5 + _10111
+	//	i87      = ((i67 << 7 + _111001) << 5 + _10001) << 6
+	//	i101     = ((_10111 + i87) << 8 + _10101) << 3 + _11
+	//	i128     = ((i101 << 9 + _1001) << 8 + _111111) << 8
+	//	i145     = ((_1111 + i128) << 9 + _110101) << 5 + _1101
+	//	i167     = ((i145 << 9 + _110011) << 6 + _110101) << 5
+	//	i187     = ((_11001 + i167) << 8 + _101111) << 9 + _110011
+	//	i205     = ((i187 << 7 + _100101) << 6 + _111101) << 3
+	//	i223     = ((_11 + i205) << 8 + _1011) << 7 + _11101
+	//	i244     = ((i223 << 9 + _100111) << 6 + _111011) << 4
+	//	i262     = ((_1111 + i244) << 8 + _100011) << 7 + _10001
+	//	i285     = ((i262 << 7 + _101) << 8 + _10101) << 6
+	//	i299     = ((_10001 + i285) << 7 + _110001) << 4 + _1101
+	//	i325     = ((i299 << 7 + _11011) << 8 + _110011) << 9
+	//	i341     = ((_110101 + i325) << 7 + _111001) << 6 + _110011
+	//	i366     = ((i341 << 6 + _110001) << 9 + _10101) << 8
+	//	i383     = ((_100011 + i366) << 6 + _11011) << 8 + _111101
+	//	i401     = ((i383 << 3 + _11) << 10 + _1011) << 3
+	//	i422     = ((1 + i401) << 12 + _100101) << 6 + _110101
+	//	i448     = ((i422 << 12 + _100111) << 6 + _110101) << 6
+	//	i467     = ((_10101 + i448) << 11 + _101001) << 5 + _11111
+	//	i490     = ((i467 << 5 + _1011) << 9 + _111001) << 7
+	//	i508     = ((_110011 + i490) << 4 + _1101) << 11 + _110111
+	//	i535     = ((i508 << 7 + _11001) << 9 + _110111) << 9
+	//	i550     = ((_101001 + i535) << 6 + _1011) << 6 + _1101
+	//	i572     = ((i550 << 9 + _101011) << 5 + _11011) << 6
+	//	i590     = ((_11011 + i572) << 6 + _11001) << 9 + _110101
+	//	i616     = ((i590 << 7 + _10101) << 6 + _11) << 11
+	//	i630     = ((_10101 + i616) << 4 + _101) << 7 + _1111
+	//	i653     = ((i630 << 10 + _100101) << 6 + _100011) << 5
+	//	i670     = ((_1111 + i653) << 7 + _11111) << 7 + _111101
+	//	i688     = ((i670 << 3 + _101) << 10 + _101101) << 3
+	//	i708     = ((_101 + i688) << 10 + _101111) << 7 + _100001
+	//	i731     = ((i708 << 3 + _101) << 10 + _101001) << 8
+	//	i751     = ((_100111 + i731) << 3 + _11) << 14 + _110011
+	//	i768     = ((i751 << 6 + _110001) << 5 + _11111) << 4
+	//	i781     = 2*((_11 + i768) << 9 + _111111) + 1
+	//	return     ((i781 << 8 + _1101) << 5 + 1) << 81
+	//
+	// Operations: 749 squares 128 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+		t19 = new(Element)
+		t20 = new(Element)
+		t21 = new(Element)
+		t22 = new(Element)
+		t23 = new(Element)
+		t24 = new(Element)
+		t25 = new(Element)
+		t26 = new(Element)
+		t27 = new(Element)
+		t28 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24,t25,t26,t27,t28 Element
+	// Step 1: t0 = x^0x2
+	t0.Square(&x)
+
+	// Step 2: t1 = x^0x3
+	t1.Mul(&x, t0)
+
+	// Step 3: t2 = x^0x4
+	t2.Mul(&x, t1)
+
+	// Step 4: t7 = x^0x5
+	t7.Mul(&x, t2)
+
+	// Step 5: t26 = x^0x9
+	t26.Mul(t2, t7)
+
+	// Step 6: t20 = x^0xb
+	t20.Mul(t0, t26)
+
+	// Step 7: z = x^0xd
+	z.Mul(t0, t20)
+
+	// Step 8: t12 = x^0xf
+	t12.Mul(t0, z)
+
+	// Step 9: t23 = x^0x11
+	t23.Mul(t0, t12)
+
+	// Step 10: t15 = x^0x15
+	t15.Mul(t2, t23)
+
+	// Step 11: t27 = x^0x17
+	t27.Mul(t0, t15)
+
+	// Step 12: t17 = x^0x19
+	t17.Mul(t0, t27)
+
+	// Step 13: t18 = x^0x1b
+	t18.Mul(t0, t17)
+
+	// Step 14: t25 = x^0x1d
+	t25.Mul(t0, t18)
+
+	// Step 15: t2 = x^0x1f
+	t2.Mul(t0, t25)
+
+	// Step 16: t8 = x^0x21
+	t8.Mul(t0, t2)
+
+	// Step 17: t13 = x^0x23
+	t13.Mul(t0, t8)
+
+	// Step 18: t14 = x^0x25
+	t14.Mul(t0, t13)
+
+	// Step 19: t5 = x^0x27
+	t5.Mul(t0, t14)
+
+	// Step 20: t6 = x^0x29
+	t6.Mul(t0, t5)
+
+	// Step 21: t19 = x^0x2b
+	t19.Mul(t0, t6)
+
+	// Step 22: t10 = x^0x2d
+	t10.Mul(t0, t19)
+
+	// Step 23: t9 = x^0x2f
+	t9.Mul(t0, t10)
+
+	// Step 24: t3 = x^0x31
+	t3.Mul(t0, t9)
+
+	// Step 25: t4 = x^0x33
+	t4.Mul(t0, t3)
+
+	// Step 26: t16 = x^0x35
+	t16.Mul(t0, t4)
+
+	// Step 27: t21 = x^0x37
+	t21.Mul(t0, t16)
+
+	// Step 28: t22 = x^0x39
+	t22.Mul(t0, t21)
+
+	// Step 29: t24 = x^0x3b
+	t24.Mul(t0, t22)
+
+	// Step 30: t11 = x^0x3d
+	t11.Mul(t0, t24)
+
+	// Step 31: t0 = x^0x3f
+	t0.Mul(t0, t11)
+
+	// Step 32: t28 = x^0x7a
+	t28.Mul(t24, t0)
+
+	// Step 36: t28 = x^0x7a0
+	for s := 0; s < 4; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 37: t28 = x^0x7bb
+	t28.Mul(t18, t28)
+
+	// Step 44: t28 = x^0x3dd80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 45: t28 = x^0x3ddab
+	t28.Mul(t19, t28)
+
+	// Step 52: t28 = x^0x1eed580
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 53: t28 = x^0x1eed5b7
+	t28.Mul(t21, t28)
+
+	// Step 60: t28 = x^0xf76adb80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 61: t28 = x^0xf76adbb5
+	t28.Mul(t16, t28)
+
+	// Step 66: t28 = x^0x1eed5b76a0
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 67: t28 = x^0x1eed5b76b7
+	t28.Mul(t27, t28)
+
+	// Step 74: t28 = x^0xf76adbb5b80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 75: t28 = x^0xf76adbb5bb9
+	t28.Mul(t22, t28)
+
+	// Step 80: t28 = x^0x1eed5b76b7720
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 81: t28 = x^0x1eed5b76b7731
+	t28.Mul(t23, t28)
+
+	// Step 87: t28 = x^0x7bb56ddaddcc40
+	for s := 0; s < 6; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 88: t27 = x^0x7bb56ddaddcc57
+	t27.Mul(t27, t28)
+
+	// Step 96: t27 = x^0x7bb56ddaddcc5700
+	for s := 0; s < 8; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 97: t27 = x^0x7bb56ddaddcc5715
+	t27.Mul(t15, t27)
+
+	// Step 100: t27 = x^0x3ddab6ed6ee62b8a8
+	for s := 0; s < 3; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 101: t27 = x^0x3ddab6ed6ee62b8ab
+	t27.Mul(t1, t27)
+
+	// Step 110: t27 = x^0x7bb56ddaddcc5715600
+	for s := 0; s < 9; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 111: t26 = x^0x7bb56ddaddcc5715609
+	t26.Mul(t26, t27)
+
+	// Step 119: t26 = x^0x7bb56ddaddcc571560900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 120: t26 = x^0x7bb56ddaddcc57156093f
+	t26.Mul(t0, t26)
+
+	// Step 128: t26 = x^0x7bb56ddaddcc57156093f00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 129: t26 = x^0x7bb56ddaddcc57156093f0f
+	t26.Mul(t12, t26)
+
+	// Step 138: t26 = x^0xf76adbb5bb98ae2ac127e1e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 139: t26 = x^0xf76adbb5bb98ae2ac127e1e35
+	t26.Mul(t16, t26)
+
+	// Step 144: t26 = x^0x1eed5b76b77315c55824fc3c6a0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 145: t26 = x^0x1eed5b76b77315c55824fc3c6ad
+	t26.Mul(z, t26)
+
+	// Step 154: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 155: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33
+	t26.Mul(t4, t26)
+
+	// Step 161: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cc0
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 162: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cf5
+	t26.Mul(t16, t26)
+
+	// Step 167: t26 = x^0x1eed5b76b77315c55824fc3c6ad19ea0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 168: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb9
+	t26.Mul(t17, t26)
+
+	// Step 176: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 177: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f
+	t26.Mul(t9, t26)
+
+	// Step 186: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 187: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e33
+	t26.Mul(t4, t26)
+
+	// Step 194: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f1980
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 195: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5
+	t26.Mul(t14, t26)
+
+	// Step 201: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc66940
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 202: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d
+	t26.Mul(t11, t26)
+
+	// Step 205: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334be8
+	for s := 0; s < 3; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 206: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb
+	t26.Mul(t1, t26)
+
+	// Step 214: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 215: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b
+	t26.Mul(t20, t26)
+
+	// Step 222: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f58580
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 223: t25 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d
+	t25.Mul(t25, t26)
+
+	// Step 232: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a00
+	for s := 0; s < 9; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 233: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27
+	t25.Mul(t5, t25)
+
+	// Step 239: t25 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89c0
+	for s := 0; s < 6; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 240: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb
+	t24.Mul(t24, t25)
+
+	// Step 244: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb0
+	for s := 0; s < 4; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 245: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf
+	t24.Mul(t12, t24)
+
+	// Step 253: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf00
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 254: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23
+	t24.Mul(t13, t24)
+
+	// Step 261: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9180
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 262: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9191
+	t24.Mul(t23, t24)
+
+	// Step 269: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c880
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 270: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c885
+	t24.Mul(t7, t24)
+
+	// Step 278: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88500
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 279: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515
+	t24.Mul(t15, t24)
+
+	// Step 285: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214540
+	for s := 0; s < 6; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 286: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214551
+	t23.Mul(t23, t24)
+
+	// Step 293: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a880
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 294: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1
+	t23.Mul(t3, t23)
+
+	// Step 298: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b10
+	for s := 0; s < 4; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 299: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d
+	t23.Mul(z, t23)
+
+	// Step 306: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 307: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b
+	t23.Mul(t18, t23)
+
+	// Step 315: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 316: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b33
+	t23.Mul(t4, t23)
+
+	// Step 325: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 326: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635
+	t23.Mul(t16, t23)
+
+	// Step 333: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331a80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 334: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9
+	t23.Mul(t22, t23)
+
+	// Step 340: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 341: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73
+	t23.Mul(t4, t23)
+
+	// Step 347: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cc0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 348: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf1
+	t23.Mul(t3, t23)
+
+	// Step 357: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e200
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 358: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215
+	t23.Mul(t15, t23)
+
+	// Step 366: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21500
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 367: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21523
+	t23.Mul(t13, t23)
+
+	// Step 373: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 374: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db
+	t23.Mul(t18, t23)
+
+	// Step 382: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 383: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d
+	t23.Mul(t11, t23)
+
+	// Step 386: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9e8
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 387: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb
+	t23.Mul(t1, t23)
+
+	// Step 397: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac00
+	for s := 0; s < 10; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 398: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b
+	t23.Mul(t20, t23)
+
+	// Step 401: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6058
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 402: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059
+	t23.Mul(&x, t23)
+
+	// Step 414: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 415: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025
+	t23.Mul(t14, t23)
+
+	// Step 421: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640940
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 422: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975
+	t23.Mul(t16, t23)
+
+	// Step 434: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 435: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027
+	t23.Mul(t5, t23)
+
+	// Step 441: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 442: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f5
+	t23.Mul(t16, t23)
+
+	// Step 448: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 449: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55
+	t23.Mul(t15, t23)
+
+	// Step 460: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa800
+	for s := 0; s < 11; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 461: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829
+	t23.Mul(t6, t23)
+
+	// Step 466: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d550520
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 467: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f
+	t23.Mul(t2, t23)
+
+	// Step 472: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7e0
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 473: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb
+	t23.Mul(t20, t23)
+
+	// Step 482: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 483: t22 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd639
+	t22.Mul(t22, t23)
+
+	// Step 490: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1c80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 491: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3
+	t22.Mul(t4, t22)
+
+	// Step 495: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb30
+	for s := 0; s < 4; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 496: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d
+	t22.Mul(z, t22)
+
+	// Step 507: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e800
+	for s := 0; s < 11; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 508: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e837
+	t22.Mul(t21, t22)
+
+	// Step 515: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 516: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b99
+	t22.Mul(t17, t22)
+
+	// Step 525: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373200
+	for s := 0; s < 9; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 526: t21 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237
+	t21.Mul(t21, t22)
+
+	// Step 535: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e00
+	for s := 0; s < 9; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 536: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e29
+	t21.Mul(t6, t21)
+
+	// Step 542: t21 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a40
+	for s := 0; s < 6; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 543: t20 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b
+	t20.Mul(t20, t21)
+
+	// Step 549: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292c0
+	for s := 0; s < 6; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 550: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd
+	t20.Mul(z, t20)
+
+	// Step 559: t20 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a00
+	for s := 0; s < 9; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 560: t19 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2b
+	t19.Mul(t19, t20)
+
+	// Step 565: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b34560
+	for s := 0; s < 5; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 566: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b
+	t19.Mul(t18, t19)
+
+	// Step 572: t19 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15ec0
+	for s := 0; s < 6; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 573: t18 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb
+	t18.Mul(t18, t19)
+
+	// Step 579: t18 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 580: t17 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d9
+	t17.Mul(t17, t18)
+
+	// Step 589: t17 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db200
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 590: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db235
+	t16.Mul(t16, t17)
+
+	// Step 597: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a80
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 598: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a95
+	t16.Mul(t15, t16)
+
+	// Step 604: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a540
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 605: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a543
+	t16.Mul(t1, t16)
+
+	// Step 616: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1800
+	for s := 0; s < 11; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 617: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1815
+	t15.Mul(t15, t16)
+
+	// Step 621: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18150
+	for s := 0; s < 4; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 622: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18155
+	t15.Mul(t7, t15)
+
+	// Step 629: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa80
+	for s := 0; s < 7; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 630: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f
+	t15.Mul(t12, t15)
+
+	// Step 640: t15 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c00
+	for s := 0; s < 10; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 641: t14 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c25
+	t14.Mul(t14, t15)
+
+	// Step 647: t14 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0940
+	for s := 0; s < 6; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 648: t13 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963
+	t13.Mul(t13, t14)
+
+	// Step 653: t13 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c60
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 654: t12 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f
+	t12.Mul(t12, t13)
+
+	// Step 661: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963780
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 662: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f
+	t12.Mul(t2, t12)
+
+	// Step 669: t12 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcf80
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 670: t11 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbd
+	t11.Mul(t11, t12)
+
+	// Step 673: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7de8
+	for s := 0; s < 3; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 674: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded
+	t11.Mul(t7, t11)
+
+	// Step 684: t11 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b400
+	for s := 0; s < 10; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 685: t10 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42d
+	t10.Mul(t10, t11)
+
+	// Step 688: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda168
+	for s := 0; s < 3; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 689: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d
+	t10.Mul(t7, t10)
+
+	// Step 699: t10 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b400
+	for s := 0; s < 10; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 700: t9 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f
+	t9.Mul(t9, t10)
+
+	// Step 707: t9 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da1780
+	for s := 0; s < 7; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 708: t8 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1
+	t8.Mul(t8, t9)
+
+	// Step 711: t8 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd08
+	for s := 0; s < 3; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 712: t7 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d
+	t7.Mul(t7, t8)
+
+	// Step 722: t7 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43400
+	for s := 0; s < 10; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 723: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429
+	t6.Mul(t6, t7)
+
+	// Step 731: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 732: t5 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342927
+	t5.Mul(t5, t6)
+
+	// Step 735: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a14938
+	for s := 0; s < 3; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 736: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b
+	t5.Mul(t1, t5)
+
+	// Step 750: t5 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec000
+	for s := 0; s < 14; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 751: t4 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033
+	t4.Mul(t4, t5)
+
+	// Step 757: t4 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cc0
+	for s := 0; s < 6; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 758: t3 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1
+	t3.Mul(t3, t4)
+
+	// Step 763: t3 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e20
+	for s := 0; s < 5; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 764: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f
+	t2.Mul(t2, t3)
+
+	// Step 768: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f0
+	for s := 0; s < 4; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 769: t1 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f3
+	t1.Mul(t1, t2)
+
+	// Step 778: t1 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e600
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 779: t0 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f
+	t0.Mul(t0, t1)
+
+	// Step 780: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7e
+	t0.Square(t0)
+
+	// Step 781: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f
+	t0.Mul(&x, t0)
+
+	// Step 789: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f00
+	for s := 0; s < 8; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 790: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d
+	z.Mul(z, t0)
+
+	// Step 795: z = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1f98fe1a0
+	for s := 0; s < 5; s++ {
+		z.Square(z)
+	}
+
+	// Step 796: z = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1f98fe1a1
+	z.Mul(&x, z)
+
+	// Step 877: z = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
+	for s := 0; s < 81; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bw6-756/fp/element_fuzz.go b/ecc/bw6-756/fp/element_fuzz.go
new file mode 100644
index 000000000..19d12cce9
--- /dev/null
+++ b/ecc/bw6-756/fp/element_fuzz.go
@@ -0,0 +1,200 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[11] %= qElement[11]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+		z[6], b = bits.Sub64(z[6], qElement[6], b)
+		z[7], b = bits.Sub64(z[7], qElement[7], b)
+		z[8], b = bits.Sub64(z[8], qElement[8], b)
+		z[9], b = bits.Sub64(z[9], qElement[9], b)
+		z[10], b = bits.Sub64(z[10], qElement[10], b)
+		z[11], b = bits.Sub64(z[11], qElement[11], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[11] > qElement[11] {
+		return true
+	}
+	if z[11] < qElement[11] {
+		return false
+	}
+
+	if z[10] > qElement[10] {
+		return true
+	}
+	if z[10] < qElement[10] {
+		return false
+	}
+
+	if z[9] > qElement[9] {
+		return true
+	}
+	if z[9] < qElement[9] {
+		return false
+	}
+
+	if z[8] > qElement[8] {
+		return true
+	}
+	if z[8] < qElement[8] {
+		return false
+	}
+
+	if z[7] > qElement[7] {
+		return true
+	}
+	if z[7] < qElement[7] {
+		return false
+	}
+
+	if z[6] > qElement[6] {
+		return true
+	}
+	if z[6] < qElement[6] {
+		return false
+	}
+
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bw6-756/fp/element_mul_adx_amd64.s b/ecc/bw6-756/fp/element_mul_adx_amd64.s
new file mode 100644
index 000000000..689a4e512
--- /dev/null
+++ b/ecc/bw6-756/fp/element_mul_adx_amd64.s
@@ -0,0 +1,2739 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $96-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	MOVQ x+8(FP), AX
+
+	// x[0] -> s0-8(SP)
+	// x[1] -> s1-16(SP)
+	// x[2] -> s2-24(SP)
+	// x[3] -> s3-32(SP)
+	// x[4] -> s4-40(SP)
+	// x[5] -> s5-48(SP)
+	// x[6] -> s6-56(SP)
+	// x[7] -> s7-64(SP)
+	// x[8] -> s8-72(SP)
+	// x[9] -> s9-80(SP)
+	// x[10] -> s10-88(SP)
+	// x[11] -> s11-96(SP)
+	MOVQ 0(AX), R14
+	MOVQ 8(AX), R15
+	MOVQ 16(AX), CX
+	MOVQ 24(AX), BX
+	MOVQ 32(AX), SI
+	MOVQ 40(AX), DI
+	MOVQ 48(AX), R8
+	MOVQ 56(AX), R9
+	MOVQ 64(AX), R10
+	MOVQ 72(AX), R11
+	MOVQ 80(AX), R12
+	MOVQ 88(AX), R13
+	MOVQ R14, s0-8(SP)
+	MOVQ R15, s1-16(SP)
+	MOVQ CX, s2-24(SP)
+	MOVQ BX, s3-32(SP)
+	MOVQ SI, s4-40(SP)
+	MOVQ DI, s5-48(SP)
+	MOVQ R8, s6-56(SP)
+	MOVQ R9, s7-64(SP)
+	MOVQ R10, s8-72(SP)
+	MOVQ R11, s9-80(SP)
+	MOVQ R12, s10-88(SP)
+	MOVQ R13, s11-96(SP)
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// t[6] -> R8
+	// t[7] -> R9
+	// t[8] -> R10
+	// t[9] -> R11
+	// t[10] -> R12
+	// t[11] -> R13
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 0(AX), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ s0-8(SP), R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ s1-16(SP), AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ s2-24(SP), AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ s3-32(SP), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ s4-40(SP), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ s5-48(SP), AX, R8
+	ADOXQ AX, DI
+
+	// (A,t[6])  := x[6]*y[0] + A
+	MULXQ s6-56(SP), AX, R9
+	ADOXQ AX, R8
+
+	// (A,t[7])  := x[7]*y[0] + A
+	MULXQ s7-64(SP), AX, R10
+	ADOXQ AX, R9
+
+	// (A,t[8])  := x[8]*y[0] + A
+	MULXQ s8-72(SP), AX, R11
+	ADOXQ AX, R10
+
+	// (A,t[9])  := x[9]*y[0] + A
+	MULXQ s9-80(SP), AX, R12
+	ADOXQ AX, R11
+
+	// (A,t[10])  := x[10]*y[0] + A
+	MULXQ s10-88(SP), AX, R13
+	ADOXQ AX, R12
+
+	// (A,t[11])  := x[11]*y[0] + A
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 8(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[1] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[1] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[1] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[1] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[1] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 16(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[2] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[2] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[2] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[2] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[2] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 24(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[3] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[3] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[3] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[3] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[3] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 32(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[4] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[4] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[4] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[4] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[4] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[4] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 40(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[5] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[5] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[5] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[5] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[5] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[5] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 48(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[6] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[6] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[6] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[6] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[6] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[6] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[6] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[6] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[6] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[6] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[6] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[6] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 56(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[7] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[7] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[7] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[7] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[7] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[7] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[7] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[7] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[7] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[7] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[7] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[7] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 64(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[8] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[8] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[8] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[8] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[8] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[8] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[8] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[8] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[8] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[8] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[8] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[8] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 72(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[9] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[9] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[9] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[9] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[9] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[9] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[9] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[9] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[9] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[9] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[9] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[9] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 80(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[10] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[10] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[10] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[10] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[10] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[10] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[10] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[10] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[10] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[10] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[10] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[10] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 88(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[11] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[11] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[11] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[11] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[11] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[11] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[11] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[11] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[11] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[11] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[11] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[11] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+TEXT ·fromMont(SB), $96-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	MOVQ 48(DX), R8
+	MOVQ 56(DX), R9
+	MOVQ 64(DX), R10
+	MOVQ 72(DX), R11
+	MOVQ 80(DX), R12
+	MOVQ 88(DX), R13
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
diff --git a/ecc/bw6-756/fp/element_mul_amd64.s b/ecc/bw6-756/fp/element_mul_amd64.s
new file mode 100644
index 000000000..738f1a4b6
--- /dev/null
+++ b/ecc/bw6-756/fp/element_mul_amd64.s
@@ -0,0 +1,2759 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $96-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), AX
+
+	// x[0] -> s0-8(SP)
+	// x[1] -> s1-16(SP)
+	// x[2] -> s2-24(SP)
+	// x[3] -> s3-32(SP)
+	// x[4] -> s4-40(SP)
+	// x[5] -> s5-48(SP)
+	// x[6] -> s6-56(SP)
+	// x[7] -> s7-64(SP)
+	// x[8] -> s8-72(SP)
+	// x[9] -> s9-80(SP)
+	// x[10] -> s10-88(SP)
+	// x[11] -> s11-96(SP)
+	MOVQ 0(AX), R14
+	MOVQ 8(AX), R15
+	MOVQ 16(AX), CX
+	MOVQ 24(AX), BX
+	MOVQ 32(AX), SI
+	MOVQ 40(AX), DI
+	MOVQ 48(AX), R8
+	MOVQ 56(AX), R9
+	MOVQ 64(AX), R10
+	MOVQ 72(AX), R11
+	MOVQ 80(AX), R12
+	MOVQ 88(AX), R13
+	MOVQ R14, s0-8(SP)
+	MOVQ R15, s1-16(SP)
+	MOVQ CX, s2-24(SP)
+	MOVQ BX, s3-32(SP)
+	MOVQ SI, s4-40(SP)
+	MOVQ DI, s5-48(SP)
+	MOVQ R8, s6-56(SP)
+	MOVQ R9, s7-64(SP)
+	MOVQ R10, s8-72(SP)
+	MOVQ R11, s9-80(SP)
+	MOVQ R12, s10-88(SP)
+	MOVQ R13, s11-96(SP)
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// t[6] -> R8
+	// t[7] -> R9
+	// t[8] -> R10
+	// t[9] -> R11
+	// t[10] -> R12
+	// t[11] -> R13
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 0(AX), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ s0-8(SP), R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ s1-16(SP), AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ s2-24(SP), AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ s3-32(SP), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ s4-40(SP), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ s5-48(SP), AX, R8
+	ADOXQ AX, DI
+
+	// (A,t[6])  := x[6]*y[0] + A
+	MULXQ s6-56(SP), AX, R9
+	ADOXQ AX, R8
+
+	// (A,t[7])  := x[7]*y[0] + A
+	MULXQ s7-64(SP), AX, R10
+	ADOXQ AX, R9
+
+	// (A,t[8])  := x[8]*y[0] + A
+	MULXQ s8-72(SP), AX, R11
+	ADOXQ AX, R10
+
+	// (A,t[9])  := x[9]*y[0] + A
+	MULXQ s9-80(SP), AX, R12
+	ADOXQ AX, R11
+
+	// (A,t[10])  := x[10]*y[0] + A
+	MULXQ s10-88(SP), AX, R13
+	ADOXQ AX, R12
+
+	// (A,t[11])  := x[11]*y[0] + A
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 8(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[1] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[1] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[1] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[1] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[1] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 16(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[2] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[2] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[2] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[2] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[2] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 24(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[3] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[3] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[3] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[3] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[3] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 32(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[4] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[4] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[4] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[4] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[4] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[4] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 40(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[5] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[5] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[5] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[5] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[5] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[5] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 48(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[6] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[6] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[6] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[6] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[6] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[6] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[6] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[6] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[6] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[6] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[6] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[6] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 56(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[7] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[7] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[7] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[7] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[7] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[7] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[7] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[7] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[7] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[7] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[7] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[7] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 64(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[8] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[8] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[8] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[8] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[8] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[8] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[8] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[8] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[8] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[8] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[8] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[8] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 72(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[9] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[9] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[9] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[9] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[9] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[9] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[9] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[9] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[9] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[9] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[9] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[9] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 80(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[10] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[10] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[10] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[10] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[10] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[10] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[10] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[10] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[10] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[10] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[10] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[10] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 88(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[11] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[11] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[11] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[11] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[11] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[11] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[11] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[11] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[11] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[11] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[11] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[11] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $96-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	MOVQ 48(DX), R8
+	MOVQ 56(DX), R9
+	MOVQ 64(DX), R10
+	MOVQ 72(DX), R11
+	MOVQ 80(DX), R12
+	MOVQ 88(DX), R13
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bw6-756/fp/element_ops_amd64.go b/ecc/bw6-756/fp/element_ops_amd64.go
new file mode 100644
index 000000000..73a3711ec
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bw6-756/fp/element_ops_amd64.s b/ecc/bw6-756/fp/element_ops_amd64.s
new file mode 100644
index 000000000..2fd5939cd
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_amd64.s
@@ -0,0 +1,746 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), $80-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ 48(AX), R10
+	MOVQ 56(AX), R11
+	MOVQ 64(AX), R12
+	MOVQ 72(AX), R13
+	MOVQ 80(AX), R14
+	MOVQ 88(AX), R15
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+	ADCQ 48(DX), R10
+	ADCQ 56(DX), R11
+	ADCQ 64(DX), R12
+	ADCQ 72(DX), R13
+	ADCQ 80(DX), R14
+	ADCQ 88(DX), R15
+
+	// reduce element(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) using temp registers (AX,DX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,AX,DX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	MOVQ R10, 48(AX)
+	MOVQ R11, 56(AX)
+	MOVQ R12, 64(AX)
+	MOVQ R13, 72(AX)
+	MOVQ R14, 80(AX)
+	MOVQ R15, 88(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), R14
+	MOVQ 0(R14), AX
+	MOVQ 8(R14), DX
+	MOVQ 16(R14), CX
+	MOVQ 24(R14), BX
+	MOVQ 32(R14), SI
+	MOVQ 40(R14), DI
+	MOVQ 48(R14), R8
+	MOVQ 56(R14), R9
+	MOVQ 64(R14), R10
+	MOVQ 72(R14), R11
+	MOVQ 80(R14), R12
+	MOVQ 88(R14), R13
+	MOVQ y+16(FP), R14
+	SUBQ 0(R14), AX
+	SBBQ 8(R14), DX
+	SBBQ 16(R14), CX
+	SBBQ 24(R14), BX
+	SBBQ 32(R14), SI
+	SBBQ 40(R14), DI
+	SBBQ 48(R14), R8
+	SBBQ 56(R14), R9
+	SBBQ 64(R14), R10
+	SBBQ 72(R14), R11
+	SBBQ 80(R14), R12
+	SBBQ 88(R14), R13
+	JCC  l1
+	MOVQ $1, R15
+	ADDQ R15, AX
+	MOVQ $0x33c7e63f86840000, R15
+	ADCQ R15, DX
+	MOVQ $0xd0b685e868524ec0, R15
+	ADCQ R15, CX
+	MOVQ $0x4302aa3c258de7de, R15
+	ADCQ R15, BX
+	MOVQ $0xe292cd15edb646a5, R15
+	ADCQ R15, SI
+	MOVQ $0x0a7eb1cb3d06e646, R15
+	ADCQ R15, DI
+	MOVQ $0xeb02c812ea04faaa, R15
+	ADCQ R15, R8
+	MOVQ $0xccc6ae73c42a46d9, R15
+	ADCQ R15, R9
+	MOVQ $0xfbf23221455163a6, R15
+	ADCQ R15, R10
+	MOVQ $0x5c978cd2fac2ce89, R15
+	ADCQ R15, R11
+	MOVQ $0xe2ac127e1e3568cf, R15
+	ADCQ R15, R12
+	MOVQ $0x000f76adbb5bb98a, R15
+	ADCQ R15, R13
+
+l1:
+	MOVQ res+0(FP), R14
+	MOVQ AX, 0(R14)
+	MOVQ DX, 8(R14)
+	MOVQ CX, 16(R14)
+	MOVQ BX, 24(R14)
+	MOVQ SI, 32(R14)
+	MOVQ DI, 40(R14)
+	MOVQ R8, 48(R14)
+	MOVQ R9, 56(R14)
+	MOVQ R10, 64(R14)
+	MOVQ R11, 72(R14)
+	MOVQ R12, 80(R14)
+	MOVQ R13, 88(R14)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), $80-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,AX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,AX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	MOVQ R9, 48(R15)
+	MOVQ R10, 56(R15)
+	MOVQ R11, 64(R15)
+	MOVQ R12, 72(R15)
+	MOVQ R13, 80(R15)
+	MOVQ R14, 88(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R15
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  48(AX), R9
+	MOVQ  56(AX), R10
+	MOVQ  64(AX), R11
+	MOVQ  72(AX), R12
+	MOVQ  80(AX), R13
+	MOVQ  88(AX), R14
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	ORQ   R11, AX
+	ORQ   R12, AX
+	ORQ   R13, AX
+	ORQ   R14, AX
+	TESTQ AX, AX
+	JEQ   l2
+	MOVQ  $1, AX
+	SUBQ  DX, AX
+	MOVQ  AX, 0(R15)
+	MOVQ  $0x33c7e63f86840000, AX
+	SBBQ  CX, AX
+	MOVQ  AX, 8(R15)
+	MOVQ  $0xd0b685e868524ec0, AX
+	SBBQ  BX, AX
+	MOVQ  AX, 16(R15)
+	MOVQ  $0x4302aa3c258de7de, AX
+	SBBQ  SI, AX
+	MOVQ  AX, 24(R15)
+	MOVQ  $0xe292cd15edb646a5, AX
+	SBBQ  DI, AX
+	MOVQ  AX, 32(R15)
+	MOVQ  $0x0a7eb1cb3d06e646, AX
+	SBBQ  R8, AX
+	MOVQ  AX, 40(R15)
+	MOVQ  $0xeb02c812ea04faaa, AX
+	SBBQ  R9, AX
+	MOVQ  AX, 48(R15)
+	MOVQ  $0xccc6ae73c42a46d9, AX
+	SBBQ  R10, AX
+	MOVQ  AX, 56(R15)
+	MOVQ  $0xfbf23221455163a6, AX
+	SBBQ  R11, AX
+	MOVQ  AX, 64(R15)
+	MOVQ  $0x5c978cd2fac2ce89, AX
+	SBBQ  R12, AX
+	MOVQ  AX, 72(R15)
+	MOVQ  $0xe2ac127e1e3568cf, AX
+	SBBQ  R13, AX
+	MOVQ  AX, 80(R15)
+	MOVQ  $0x000f76adbb5bb98a, AX
+	SBBQ  R14, AX
+	MOVQ  AX, 88(R15)
+	RET
+
+l2:
+	MOVQ AX, 0(R15)
+	MOVQ AX, 8(R15)
+	MOVQ AX, 16(R15)
+	MOVQ AX, 24(R15)
+	MOVQ AX, 32(R15)
+	MOVQ AX, 40(R15)
+	MOVQ AX, 48(R15)
+	MOVQ AX, 56(R15)
+	MOVQ AX, 64(R15)
+	MOVQ AX, 72(R15)
+	MOVQ AX, 80(R15)
+	MOVQ AX, 88(R15)
+	RET
+
+TEXT ·reduce(SB), $88-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), $88-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), $88-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $184-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
+
+	MOVQ DX, s11-96(SP)
+	MOVQ CX, s12-104(SP)
+	MOVQ BX, s13-112(SP)
+	MOVQ SI, s14-120(SP)
+	MOVQ DI, s15-128(SP)
+	MOVQ R8, s16-136(SP)
+	MOVQ R9, s17-144(SP)
+	MOVQ R10, s18-152(SP)
+	MOVQ R11, s19-160(SP)
+	MOVQ R12, s20-168(SP)
+	MOVQ R13, s21-176(SP)
+	MOVQ R14, s22-184(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ s11-96(SP), DX
+	ADCQ s12-104(SP), CX
+	ADCQ s13-112(SP), BX
+	ADCQ s14-120(SP), SI
+	ADCQ s15-128(SP), DI
+	ADCQ s16-136(SP), R8
+	ADCQ s17-144(SP), R9
+	ADCQ s18-152(SP), R10
+	ADCQ s19-160(SP), R11
+	ADCQ s20-168(SP), R12
+	ADCQ s21-176(SP), R13
+	ADCQ s22-184(SP), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $88-16
+	MOVQ b+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	MOVQ a+0(FP), AX
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	MOVQ R9, s5-48(SP)
+	MOVQ R10, s6-56(SP)
+	MOVQ R11, s7-64(SP)
+	MOVQ R12, s8-72(SP)
+	MOVQ R13, s9-80(SP)
+	MOVQ R14, s10-88(SP)
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	MOVQ b+8(FP), AX
+	SUBQ 0(AX), DX
+	SBBQ 8(AX), CX
+	SBBQ 16(AX), BX
+	SBBQ 24(AX), SI
+	SBBQ 32(AX), DI
+	SBBQ 40(AX), R8
+	SBBQ 48(AX), R9
+	SBBQ 56(AX), R10
+	SBBQ 64(AX), R11
+	SBBQ 72(AX), R12
+	SBBQ 80(AX), R13
+	SBBQ 88(AX), R14
+	JCC  l3
+	MOVQ $1, AX
+	ADDQ AX, DX
+	MOVQ $0x33c7e63f86840000, AX
+	ADCQ AX, CX
+	MOVQ $0xd0b685e868524ec0, AX
+	ADCQ AX, BX
+	MOVQ $0x4302aa3c258de7de, AX
+	ADCQ AX, SI
+	MOVQ $0xe292cd15edb646a5, AX
+	ADCQ AX, DI
+	MOVQ $0x0a7eb1cb3d06e646, AX
+	ADCQ AX, R8
+	MOVQ $0xeb02c812ea04faaa, AX
+	ADCQ AX, R9
+	MOVQ $0xccc6ae73c42a46d9, AX
+	ADCQ AX, R10
+	MOVQ $0xfbf23221455163a6, AX
+	ADCQ AX, R11
+	MOVQ $0x5c978cd2fac2ce89, AX
+	ADCQ AX, R12
+	MOVQ $0xe2ac127e1e3568cf, AX
+	ADCQ AX, R13
+	MOVQ $0x000f76adbb5bb98a, AX
+	ADCQ AX, R14
+
+l3:
+	MOVQ b+8(FP), AX
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	MOVQ R15, DX
+	MOVQ s0-8(SP), CX
+	MOVQ s1-16(SP), BX
+	MOVQ s2-24(SP), SI
+	MOVQ s3-32(SP), DI
+	MOVQ s4-40(SP), R8
+	MOVQ s5-48(SP), R9
+	MOVQ s6-56(SP), R10
+	MOVQ s7-64(SP), R11
+	MOVQ s8-72(SP), R12
+	MOVQ s9-80(SP), R13
+	MOVQ s10-88(SP), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ a+0(FP), AX
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
diff --git a/ecc/bw6-756/fp/element_ops_noasm.go b/ecc/bw6-756/fp/element_ops_noasm.go
new file mode 100644
index 000000000..fec628918
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bw6-756/fp/element_test.go b/ecc/bw6-756/fp/element_test.go
new file mode 100644
index 000000000..bfcc1d7fc
--- /dev/null
+++ b/ecc/bw6-756/fp/element_test.go
@@ -0,0 +1,2777 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		11214533042317621956,
+		4418601975293183768,
+		2233550636059863627,
+		13772400071271951950,
+		13010224617750716256,
+		15582310590478290871,
+		6301429202206019695,
+		15624904615961126890,
+		14411832617204527559,
+		10495912060283172777,
+		8432856701560321958,
+		4166778949326216,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		11214533042317621956,
+		4418601975293183768,
+		2233550636059863627,
+		13772400071271951950,
+		13010224617750716256,
+		15582310590478290871,
+		6301429202206019695,
+		15624904615961126890,
+		14411832617204527559,
+		10495912060283172777,
+		8432856701560321958,
+		4166778949326216,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 20
+	nbFuzz      = 100
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[11]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[11]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000", 16)
+	const sqrtExponentElement = "1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[11] > qElement[11] {
+		return true
+	}
+	if z[11] < qElement[11] {
+		return false
+	}
+
+	if z[10] > qElement[10] {
+		return true
+	}
+	if z[10] < qElement[10] {
+		return false
+	}
+
+	if z[9] > qElement[9] {
+		return true
+	}
+	if z[9] < qElement[9] {
+		return false
+	}
+
+	if z[8] > qElement[8] {
+		return true
+	}
+	if z[8] < qElement[8] {
+		return false
+	}
+
+	if z[7] > qElement[7] {
+		return true
+	}
+	if z[7] < qElement[7] {
+		return false
+	}
+
+	if z[6] > qElement[6] {
+		return true
+	}
+	if z[6] < qElement[6] {
+		return false
+	}
+
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[11] != ^uint64(0) {
+			g.element[11] %= (qElement[11] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[11] != ^uint64(0) {
+				g.element[11] %= (qElement[11] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[11] != ^uint64(0) {
+				g[11] %= (qElement[11] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[11] != ^uint64(0) {
+					g[11] %= (qElement[11] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], carry = bits.Add64(a[5], qElement[5], carry)
+		a[6], carry = bits.Add64(a[6], qElement[6], carry)
+		a[7], carry = bits.Add64(a[7], qElement[7], carry)
+		a[8], carry = bits.Add64(a[8], qElement[8], carry)
+		a[9], carry = bits.Add64(a[9], qElement[9], carry)
+		a[10], carry = bits.Add64(a[10], qElement[10], carry)
+		a[11], _ = bits.Add64(a[11], qElement[11], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+		inversionCorrectionFactorWord6,
+		inversionCorrectionFactorWord7,
+		inversionCorrectionFactorWord8,
+		inversionCorrectionFactorWord9,
+		inversionCorrectionFactorWord10,
+		inversionCorrectionFactorWord11,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+			inversionCorrectionFactorWord6,
+			inversionCorrectionFactorWord7,
+			inversionCorrectionFactorWord8,
+			inversionCorrectionFactorWord9,
+			inversionCorrectionFactorWord10,
+			inversionCorrectionFactorWord11,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bw6-756/fr/arith.go b/ecc/bw6-756/fr/arith.go
new file mode 100644
index 000000000..83c9fd9ef
--- /dev/null
+++ b/ecc/bw6-756/fr/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bw6-756/fr/asm.go b/ecc/bw6-756/fr/asm.go
new file mode 100644
index 000000000..8241357c4
--- /dev/null
+++ b/ecc/bw6-756/fr/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bw6-756/fr/asm_noadx.go b/ecc/bw6-756/fr/asm_noadx.go
new file mode 100644
index 000000000..221beab93
--- /dev/null
+++ b/ecc/bw6-756/fr/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bw6-756/fr/doc.go b/ecc/bw6-756/fr/doc.go
new file mode 100644
index 000000000..215d19b3d
--- /dev/null
+++ b/ecc/bw6-756/fr/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fr contains field arithmetic operations for modulus = 0x3eeb04...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [6]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f76a822c00009948a20000000001 // base 16
+// 	605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417 // base 10
+package fr
diff --git a/ecc/bw6-756/fr/element.go b/ecc/bw6-756/fr/element.go
new file mode 100644
index 000000000..afbec34c8
--- /dev/null
+++ b/ecc/bw6-756/fr/element.go
@@ -0,0 +1,1720 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 6 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+type Element [6]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 6
+
+// Bits number bits needed to represent Element
+const Bits = 378
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 11045256207009841153
+const qElementWord1 uint64 = 14886639130118979584
+const qElementWord2 uint64 = 10956628289047010687
+const qElementWord3 uint64 = 9513184293603517222
+const qElementWord4 uint64 = 6038022134869067682
+const qElementWord5 uint64 = 283357621510263184
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 11045256207009841151
+
+// rSquare
+var rSquare = Element{
+	13541478318970833666,
+	5510290684934426267,
+	8467587974331926354,
+	13931463632695577534,
+	3531303697457869800,
+	51529254522778566,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 1481365419032838079
+	z[1] = 10045892448872562649
+	z[2] = 7242180086616818316
+	z[3] = 8832319421896135475
+	z[4] = 13356930855120736188
+	z[5] = 28498675542444634
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 6 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 5522628103504920577, 0)
+	_, b = bits.Sub64(_z[1], 16666691601914265600, b)
+	_, b = bits.Sub64(_z[2], 5478314144523505343, b)
+	_, b = bits.Sub64(_z[3], 4756592146801758611, b)
+	_, b = bits.Sub64(_z[4], 3019011067434533841, b)
+	_, b = bits.Sub64(_z[5], 141678810755131592, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [48]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[5] %= 283357621510263184
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], carry = bits.Add64(z[1], 14886639130118979584, carry)
+		z[2], carry = bits.Add64(z[2], 10956628289047010687, carry)
+		z[3], carry = bits.Add64(z[3], 9513184293603517222, carry)
+		z[4], carry = bits.Add64(z[4], 6038022134869067682, carry)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [6]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		z[5], z[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [6]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 14886639130118979584, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 10956628289047010687, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 9513184293603517222, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 6038022134869067682, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		t[5], t[4] = madd3(m, 283357621510263184, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, z[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, z[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, z[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, z[3] = madd2(m, 6038022134869067682, c2, t[4])
+		z[5], z[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], _ = bits.Add64(x[5], y[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], _ = bits.Add64(x[5], x[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], c = bits.Add64(z[1], 14886639130118979584, c)
+		z[2], c = bits.Add64(z[2], 10956628289047010687, c)
+		z[3], c = bits.Add64(z[3], 9513184293603517222, c)
+		z[4], c = bits.Add64(z[4], 6038022134869067682, c)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(11045256207009841153, x[0], 0)
+	z[1], borrow = bits.Sub64(14886639130118979584, x[1], borrow)
+	z[2], borrow = bits.Sub64(10956628289047010687, x[2], borrow)
+	z[3], borrow = bits.Sub64(9513184293603517222, x[3], borrow)
+	z[4], borrow = bits.Sub64(6038022134869067682, x[4], borrow)
+	z[5], _ = bits.Sub64(283357621510263184, x[5], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[40:48], z[0])
+	binary.BigEndian.PutUint64(b[32:40], z[1])
+	binary.BigEndian.PutUint64(b[24:32], z[2])
+	binary.BigEndian.PutUint64(b[16:24], z[3])
+	binary.BigEndian.PutUint64(b[8:16], z[4])
+	binary.BigEndian.PutUint64(b[0:8], z[5])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[40:48], _z[0])
+	binary.BigEndian.PutUint64(res[32:40], _z[1])
+	binary.BigEndian.PutUint64(res[24:32], _z[2])
+	binary.BigEndian.PutUint64(res[16:24], _z[3])
+	binary.BigEndian.PutUint64(res[8:16], _z[4])
+	binary.BigEndian.PutUint64(res[0:8], _z[5])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[5] == 28498675542444634) && (l[4] == 13356930855120736188) && (l[3] == 8832319421896135475) && (l[2] == 7242180086616818316) && (l[1] == 10045892448872562649) && (l[0] == 1481365419032838079) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		15655215628902554004,
+		15894127656167592378,
+		9702012166408397168,
+		12335982559306940759,
+		1313802173610541430,
+		81629743607937133,
+	}
+	r := uint64(41)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 851295657643717122
+const inversionCorrectionFactorWord1 = 10857859049187504913
+const inversionCorrectionFactorWord2 = 7148604188520083019
+const inversionCorrectionFactorWord3 = 1138623559447261654
+const inversionCorrectionFactorWord4 = 1203095380280779597
+const inversionCorrectionFactorWord5 = 148579538565968037
+
+const invIterationsN = 26
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+
+	// the high word of m * qElement[5] is at most 62 bits
+	// x[5] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[6] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		z[5], z[4] = madd2(m, qElementWord5, t[i+5], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[5] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], _ = bits.Add64(neg1, qElementWord5, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 6 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 5
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bw6-756/fr/element_exp.go b/ecc/bw6-756/fr/element_exp.go
new file mode 100644
index 000000000..7ac8671aa
--- /dev/null
+++ b/ecc/bw6-756/fr/element_exp.go
@@ -0,0 +1,1040 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// expBySqrtExp is equivalent to z.Exp(x, fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	return     ((_101001 + i386) << 6 + _101) << 3
+	//
+	// Operations: 330 squares 67 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 397: z = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	i399     = ((_101001 + i386) << 6 + _101) << 4 + 1
+	//	return     i399 << 40
+	//
+	// Operations: 371 squares 68 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 398: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca450
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	// Step 399: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca451
+	z.Mul(&x, z)
+
+	// Step 439: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
+	for s := 0; s < 40; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bw6-756/fr/element_fuzz.go b/ecc/bw6-756/fr/element_fuzz.go
new file mode 100644
index 000000000..9d00610dd
--- /dev/null
+++ b/ecc/bw6-756/fr/element_fuzz.go
@@ -0,0 +1,152 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[5] %= qElement[5]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bw6-756/fr/element_mul_adx_amd64.s b/ecc/bw6-756/fr/element_mul_adx_amd64.s
new file mode 100644
index 000000000..a6f902c36
--- /dev/null
+++ b/ecc/bw6-756/fr/element_mul_adx_amd64.s
@@ -0,0 +1,836 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
diff --git a/ecc/bw6-756/fr/element_mul_amd64.s b/ecc/bw6-756/fr/element_mul_amd64.s
new file mode 100644
index 000000000..171a75360
--- /dev/null
+++ b/ecc/bw6-756/fr/element_mul_amd64.s
@@ -0,0 +1,858 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bw6-756/fr/element_ops_amd64.go b/ecc/bw6-756/fr/element_ops_amd64.go
new file mode 100644
index 000000000..78022b3e6
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bw6-756/fr/element_ops_amd64.s b/ecc/bw6-756/fr/element_ops_amd64.s
new file mode 100644
index 000000000..97da07d77
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_amd64.s
@@ -0,0 +1,452 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 0(R8)
+	MOVQ    DX, 8(R8)
+	MOVQ    CX, 16(R8)
+	MOVQ    BX, 24(R8)
+	MOVQ    SI, 32(R8)
+	MOVQ    DI, 40(R8)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R9
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x9948a20000000001, R10
+	SUBQ  DX, R10
+	MOVQ  R10, 0(R9)
+	MOVQ  $0xce97f76a822c0000, R10
+	SBBQ  CX, R10
+	MOVQ  R10, 8(R9)
+	MOVQ  $0x980dc360d0a49d7f, R10
+	SBBQ  BX, R10
+	MOVQ  R10, 16(R9)
+	MOVQ  $0x84059eb647102326, R10
+	SBBQ  SI, R10
+	MOVQ  R10, 24(R9)
+	MOVQ  $0x53cb5d240ed107a2, R10
+	SBBQ  DI, R10
+	MOVQ  R10, 32(R9)
+	MOVQ  $0x03eeb0416684d190, R10
+	SBBQ  R8, R10
+	MOVQ  R10, 40(R9)
+	RET
+
+l1:
+	MOVQ AX, 0(R9)
+	MOVQ AX, 8(R9)
+	MOVQ AX, 16(R9)
+	MOVQ AX, 24(R9)
+	MOVQ AX, 32(R9)
+	MOVQ AX, 40(R9)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
+	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $40-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ R15, DX
+	ADCQ s0-8(SP), CX
+	ADCQ s1-16(SP), BX
+	ADCQ s2-24(SP), SI
+	ADCQ s3-32(SP), DI
+	ADCQ s4-40(SP), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $48-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    32(AX), R8
+	MOVQ    40(AX), R9
+	MOVQ    CX, R10
+	MOVQ    BX, R11
+	MOVQ    SI, R12
+	MOVQ    DI, R13
+	MOVQ    R8, R14
+	MOVQ    R9, R15
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	ADCQ    32(DX), R8
+	ADCQ    40(DX), R9
+	SUBQ    0(DX), R10
+	SBBQ    8(DX), R11
+	SBBQ    16(DX), R12
+	SBBQ    24(DX), R13
+	SBBQ    32(DX), R14
+	SBBQ    40(DX), R15
+	MOVQ    CX, s0-8(SP)
+	MOVQ    BX, s1-16(SP)
+	MOVQ    SI, s2-24(SP)
+	MOVQ    DI, s3-32(SP)
+	MOVQ    R8, s4-40(SP)
+	MOVQ    R9, s5-48(SP)
+	MOVQ    $0x9948a20000000001, CX
+	MOVQ    $0xce97f76a822c0000, BX
+	MOVQ    $0x980dc360d0a49d7f, SI
+	MOVQ    $0x84059eb647102326, DI
+	MOVQ    $0x53cb5d240ed107a2, R8
+	MOVQ    $0x03eeb0416684d190, R9
+	CMOVQCC AX, CX
+	CMOVQCC AX, BX
+	CMOVQCC AX, SI
+	CMOVQCC AX, DI
+	CMOVQCC AX, R8
+	CMOVQCC AX, R9
+	ADDQ    CX, R10
+	ADCQ    BX, R11
+	ADCQ    SI, R12
+	ADCQ    DI, R13
+	ADCQ    R8, R14
+	ADCQ    R9, R15
+	MOVQ    s0-8(SP), CX
+	MOVQ    s1-16(SP), BX
+	MOVQ    s2-24(SP), SI
+	MOVQ    s3-32(SP), DI
+	MOVQ    s4-40(SP), R8
+	MOVQ    s5-48(SP), R9
+	MOVQ    R10, 0(DX)
+	MOVQ    R11, 8(DX)
+	MOVQ    R12, 16(DX)
+	MOVQ    R13, 24(DX)
+	MOVQ    R14, 32(DX)
+	MOVQ    R15, 40(DX)
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
diff --git a/ecc/bw6-756/fr/element_ops_noasm.go b/ecc/bw6-756/fr/element_ops_noasm.go
new file mode 100644
index 000000000..ec1fac18d
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bw6-756/fr/element_test.go b/ecc/bw6-756/fr/element_test.go
new file mode 100644
index 000000000..4b3fb2975
--- /dev/null
+++ b/ecc/bw6-756/fr/element_test.go
@@ -0,0 +1,2681 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[5]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[5]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000", 16)
+	const sqrtExponentElement = "fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[5] != ^uint64(0) {
+			g.element[5] %= (qElement[5] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[5] != ^uint64(0) {
+				g.element[5] %= (qElement[5] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[5] != ^uint64(0) {
+				g[5] %= (qElement[5] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[5] != ^uint64(0) {
+					g[5] %= (qElement[5] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], _ = bits.Add64(a[5], qElement[5], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bw6-756/fr/fft/doc.go b/ecc/bw6-756/fr/fft/doc.go
new file mode 100644
index 000000000..3c35170e8
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fft provides in-place discrete Fourier transform.
+package fft
diff --git a/ecc/bw6-756/fr/fft/domain.go b/ecc/bw6-756/fr/fft/domain.go
new file mode 100644
index 000000000..dc31f8246
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/domain.go
@@ -0,0 +1,300 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"fmt"
+	"io"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+
+	curve "github.com/consensys/gnark-crypto/ecc/bw6-756"
+
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+// Domain with a power of 2 cardinality
+// compute a field element of order 2x and store it in FinerGenerator
+// all other values can be derived from x, GeneratorSqrt
+type Domain struct {
+	Cardinality             uint64
+	Depth                   uint64
+	PrecomputeReversedTable uint64 // uint64 so it is recognized by the decoder from gnark-crypto
+	CardinalityInv          fr.Element
+	Generator               fr.Element
+	GeneratorInv            fr.Element
+	FinerGenerator          fr.Element
+	FinerGeneratorInv       fr.Element
+
+	// the following slices are not serialized and are (re)computed through domain.preComputeTwiddles()
+
+	// Twiddles factor for the FFT using Generator for each stage of the recursive FFT
+	Twiddles [][]fr.Element
+
+	// Twiddles factor for the FFT using GeneratorInv for each stage of the recursive FFT
+	TwiddlesInv [][]fr.Element
+
+	// we precompute these mostly to avoid the memory intensive bit reverse permutation in the groth16.Prover
+
+	// CosetTable[i][j] = domain.Generator(i-th)Sqrt ^ j
+	// CosetTable = fft.BitReverse(CosetTable)
+	CosetTable         [][]fr.Element
+	CosetTableReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+
+	// CosetTable[i][j] = domain.Generator(i-th)SqrtInv ^ j
+	// CosetTableInv = fft.BitReverse(CosetTableInv)
+	CosetTableInv         [][]fr.Element
+	CosetTableInvReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+}
+
+// NewDomain returns a subgroup with a power of 2 cardinality
+// cardinality >= m
+// If depth>0, the Domain will also store a primitive (2**depth)*m root
+// of 1, with associated precomputed data. This allows to perform shifted
+// FFT/FFTInv.
+// If precomputeReversedCosetTable is set, the bit reversed cosetTable/cosetTableInv are precomputed.
+//
+// example:
+// --------
+//
+// * NewDomain(m, 0, false) outputs a new domain to perform the fft on Z/mZ.
+// * NewDomain(m, 2, false) outputs a new domain to perform fft on Z/mZ, plus a primitive
+// 2**2*m=4m-th root of 1 and associated data to compute fft/fftinv on the cosets of
+// (Z/4mZ)/(Z/mZ).
+func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
+
+	// generator of the largest 2-adic subgroup
+	var rootOfUnity fr.Element
+
+	rootOfUnity.SetString("199251335866470442271346949249090720992237796757894062992204115206570647302191425225605716521843542790404563904580")
+	const maxOrderRoot uint64 = 41
+
+	domain := &Domain{}
+	x := ecc.NextPowerOfTwo(m)
+	domain.Cardinality = uint64(x)
+	domain.Depth = depth
+	if precomputeReversedTable {
+		domain.PrecomputeReversedTable = 1
+	}
+
+	// find generator for Z/2^(log(m))Z  and Z/2^(log(m)+cosets)Z
+	logx := uint64(bits.TrailingZeros64(x))
+	if logx > maxOrderRoot {
+		panic(fmt.Sprintf("m (%d) is too big: the required root of unity does not exist", m))
+	}
+	logGen := logx + depth
+	if logGen > maxOrderRoot {
+		panic("log(m) + cosets is too big: the required root of unity does not exist")
+	}
+
+	expo := uint64(1 << (maxOrderRoot - logGen))
+	bExpo := new(big.Int).SetUint64(expo)
+	domain.FinerGenerator.Exp(rootOfUnity, bExpo)
+	domain.FinerGeneratorInv.Inverse(&domain.FinerGenerator)
+
+	// Generator = FinerGenerator^2 has order x
+	expo = uint64(1 << (maxOrderRoot - logx))
+	bExpo.SetUint64(expo)
+	domain.Generator.Exp(rootOfUnity, bExpo) // order x
+	domain.GeneratorInv.Inverse(&domain.Generator)
+	domain.CardinalityInv.SetUint64(uint64(x)).Inverse(&domain.CardinalityInv)
+
+	// twiddle factors
+	domain.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if depth > 0 && precomputeReversedTable {
+		domain.reverseCosetTables()
+	}
+
+	return domain
+}
+
+func (d *Domain) reverseCosetTables() {
+	nbCosets := (1 << d.Depth) - 1
+	d.CosetTableReversed = make([][]fr.Element, nbCosets)
+	d.CosetTableInvReversed = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTableReversed[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInvReversed[i] = make([]fr.Element, d.Cardinality)
+		copy(d.CosetTableReversed[i], d.CosetTable[i])
+		copy(d.CosetTableInvReversed[i], d.CosetTableInv[i])
+		BitReverse(d.CosetTableReversed[i])
+		BitReverse(d.CosetTableInvReversed[i])
+	}
+}
+
+func (d *Domain) preComputeTwiddles() {
+
+	// nb fft stages
+	nbStages := uint64(bits.TrailingZeros64(d.Cardinality))
+	nbCosets := (1 << d.Depth) - 1
+
+	d.Twiddles = make([][]fr.Element, nbStages)
+	d.TwiddlesInv = make([][]fr.Element, nbStages)
+	d.CosetTable = make([][]fr.Element, nbCosets)
+	d.CosetTableInv = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTable[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInv[i] = make([]fr.Element, d.Cardinality)
+	}
+
+	var wg sync.WaitGroup
+
+	// for each fft stage, we pre compute the twiddle factors
+	twiddles := func(t [][]fr.Element, omega fr.Element) {
+		for i := uint64(0); i < nbStages; i++ {
+			t[i] = make([]fr.Element, 1+(1<<(nbStages-i-1)))
+			var w fr.Element
+			if i == 0 {
+				w = omega
+			} else {
+				w = t[i-1][2]
+			}
+			t[i][0] = fr.One()
+			t[i][1] = w
+			for j := 2; j < len(t[i]); j++ {
+				t[i][j].Mul(&t[i][j-1], &w)
+			}
+		}
+		wg.Done()
+	}
+
+	expTable := func(sqrt fr.Element, t []fr.Element) {
+		t[0] = fr.One()
+		precomputeExpTable(sqrt, t)
+		wg.Done()
+	}
+
+	if nbCosets > 0 {
+		cosetGens := make([]fr.Element, nbCosets)
+		cosetGensInv := make([]fr.Element, nbCosets)
+		cosetGens[0].Set(&d.FinerGenerator)
+		cosetGensInv[0].Set(&d.FinerGeneratorInv)
+		for i := 1; i < nbCosets; i++ {
+			cosetGens[i].Mul(&cosetGens[i-1], &d.FinerGenerator)
+			cosetGensInv[i].Mul(&cosetGensInv[i-1], &d.FinerGeneratorInv)
+		}
+		wg.Add(2 + 2*nbCosets)
+		go twiddles(d.Twiddles, d.Generator)
+		go twiddles(d.TwiddlesInv, d.GeneratorInv)
+		for i := 0; i < nbCosets-1; i++ {
+			go expTable(cosetGens[i], d.CosetTable[i])
+			go expTable(cosetGensInv[i], d.CosetTableInv[i])
+		}
+		go expTable(cosetGens[nbCosets-1], d.CosetTable[nbCosets-1])
+		expTable(cosetGensInv[nbCosets-1], d.CosetTableInv[nbCosets-1])
+
+		wg.Wait()
+
+	} else {
+		wg.Add(2)
+		go twiddles(d.Twiddles, d.Generator)
+		twiddles(d.TwiddlesInv, d.GeneratorInv)
+		wg.Wait()
+	}
+
+}
+
+func precomputeExpTable(w fr.Element, table []fr.Element) {
+	n := len(table)
+
+	// see if it makes sense to parallelize exp tables pre-computation
+	interval := 0
+	if runtime.NumCPU() >= 4 {
+		interval = (n - 1) / (runtime.NumCPU() / 4)
+	}
+
+	// this ratio roughly correspond to the number of multiplication one can do in place of a Exp operation
+	const ratioExpMul = 6000 / 17
+
+	if interval < ratioExpMul {
+		precomputeExpTableChunk(w, 1, table[1:])
+		return
+	}
+
+	// we parallelize
+	var wg sync.WaitGroup
+	for i := 1; i < n; i += interval {
+		start := i
+		end := i + interval
+		if end > n {
+			end = n
+		}
+		wg.Add(1)
+		go func() {
+			precomputeExpTableChunk(w, uint64(start), table[start:end])
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func precomputeExpTableChunk(w fr.Element, power uint64, table []fr.Element) {
+
+	// this condition ensures that creating a domain of size 1 with cosets don't fail
+	if len(table) > 0 {
+		table[0].Exp(w, new(big.Int).SetUint64(power))
+		for i := 1; i < len(table); i++ {
+			table[i].Mul(&table[i-1], &w)
+		}
+	}
+}
+
+// WriteTo writes a binary representation of the domain (without the precomputed twiddle factors)
+// to the provided writer
+func (d *Domain) WriteTo(w io.Writer) (int64, error) {
+
+	enc := curve.NewEncoder(w)
+
+	toEncode := []interface{}{d.Cardinality, d.Depth, d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom attempts to decode a domain from Reader
+func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
+
+	dec := curve.NewDecoder(r)
+
+	toDecode := []interface{}{&d.Cardinality, &d.Depth, &d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	d.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if d.Depth > 0 && d.PrecomputeReversedTable == 1 {
+		d.reverseCosetTables()
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bw6-756/fr/fft/domain_test.go b/ecc/bw6-756/fr/fft/domain_test.go
new file mode 100644
index 000000000..df72f0e3a
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/domain_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestDomainSerialization(t *testing.T) {
+
+	domain := NewDomain(1<<6, 1, true)
+	var reconstructed Domain
+
+	var buf bytes.Buffer
+	written, err := domain.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var read int64
+	read, err = reconstructed.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if written != read {
+		t.Fatal("didn't read as many bytes as we wrote")
+	}
+	if !reflect.DeepEqual(domain, &reconstructed) {
+		t.Fatal("Domain.SetBytes(Bytes()) failed")
+	}
+}
diff --git a/ecc/bw6-756/fr/fft/fft.go b/ecc/bw6-756/fr/fft/fft.go
new file mode 100644
index 000000000..503f375ba
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fft.go
@@ -0,0 +1,319 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/bits"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// Decimation is used in the FFT call to select decimation in time or in frequency
+type Decimation uint8
+
+const (
+	DIT Decimation = iota
+	DIF
+)
+
+// parallelize threshold for a single butterfly op, if the fft stage is not parallelized already
+const butterflyThreshold = 16
+
+// FFT computes (recursively) the discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+//
+// example:
+// -------
+// domain := NewDomain(m, 2) -->  contains precomputed data for Z/mZ, and Z/4mZ
+// FFT(pol, DIT, 1) --> evaluates pol on the coset 1 in (Z/4mZ)/(Z/mZ)
+func (domain *Domain) FFT(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// if coset != 0, scale by coset table
+	if coset != 0 {
+		scale := func(cosetTable []fr.Element) {
+			parallel.Execute(len(a), func(start, end int) {
+				for i := start; i < end; i++ {
+					a[i].Mul(&a[i], &cosetTable[i])
+				}
+			})
+		}
+		if decimation == DIT {
+			if domain.PrecomputeReversedTable == 0 {
+				// no precomputed coset, we adjust the index of the coset table
+				n := uint64(len(a))
+				nn := uint64(64 - bits.TrailingZeros64(n))
+				parallel.Execute(len(a), func(start, end int) {
+					for i := start; i < end; i++ {
+						irev := bits.Reverse64(uint64(i)) >> nn
+						a[i].Mul(&a[i], &domain.CosetTable[coset-1][int(irev)])
+					}
+				})
+			} else {
+				scale(domain.CosetTableReversed[coset-1])
+			}
+		} else {
+			scale(domain.CosetTable[coset-1])
+		}
+	}
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+}
+
+// FFTInverse computes (recursively) the inverse discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+
+	// scale by CardinalityInv (+ cosetTableInv is coset!=0)
+	if coset == 0 {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+		return
+	}
+
+	scale := func(cosetTable []fr.Element) {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &cosetTable[i]).
+					Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+	}
+	if decimation == DIT {
+		scale(domain.CosetTableInv[coset-1])
+		return
+	}
+
+	// decimation == DIF
+	if domain.PrecomputeReversedTable != 0 {
+		scale(domain.CosetTableInvReversed[coset-1])
+		return
+	}
+
+	// no precomputed coset, we adjust the index of the coset table
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+	parallel.Execute(len(a), func(start, end int) {
+		for i := start; i < end; i++ {
+			irev := bits.Reverse64(uint64(i)) >> nn
+			a[i].Mul(&a[i], &domain.CosetTableInv[coset-1][int(irev)]).
+				Mul(&a[i], &domain.CardinalityInv)
+		}
+	})
+
+}
+
+func difFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIF8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for i := start; i < end; i++ {
+				fr.Butterfly(&a[i], &a[i+m])
+				a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+			}
+		}, numCPU)
+	} else {
+		// i == 0
+		fr.Butterfly(&a[0], &a[m])
+		for i := 1; i < m; i++ {
+			fr.Butterfly(&a[i], &a[i+m])
+			a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+		}
+	}
+
+	if m == 1 {
+		return
+	}
+
+	nextStage := stage + 1
+	if stage < maxSplits {
+		chDone := make(chan struct{}, 1)
+		go difFFT(a[m:n], twiddles, nextStage, maxSplits, chDone)
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		difFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+	}
+
+}
+
+func ditFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIT8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	nextStage := stage + 1
+
+	if stage < maxSplits {
+		// that's the only time we fire go routines
+		chDone := make(chan struct{}, 1)
+		go ditFFT(a[m:], twiddles, nextStage, maxSplits, chDone)
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		ditFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+
+	}
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for k := start; k < end; k++ {
+				a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+				fr.Butterfly(&a[k], &a[k+m])
+			}
+		}, numCPU)
+
+	} else {
+		fr.Butterfly(&a[0], &a[m])
+		for k := 1; k < m; k++ {
+			a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+			fr.Butterfly(&a[k], &a[k+m])
+		}
+	}
+}
+
+// BitReverse applies the bit-reversal permutation to a.
+// len(a) must be a power of 2 (as in every single function in this file)
+func BitReverse(a []fr.Element) {
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+
+	for i := uint64(0); i < n; i++ {
+		irev := bits.Reverse64(i) >> nn
+		if irev > i {
+			a[i], a[irev] = a[irev], a[i]
+		}
+	}
+}
+
+// kerDIT8 is a kernel that process a FFT of size 8
+func kerDIT8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+	fr.Butterfly(&a[0], &a[2])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[5], &a[7])
+	fr.Butterfly(&a[0], &a[4])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	fr.Butterfly(&a[1], &a[5])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	fr.Butterfly(&a[2], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[3], &a[7])
+}
+
+// kerDIF8 is a kernel that process a FFT of size 8
+func kerDIF8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[4])
+	fr.Butterfly(&a[1], &a[5])
+	fr.Butterfly(&a[2], &a[6])
+	fr.Butterfly(&a[3], &a[7])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[0], &a[2])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	fr.Butterfly(&a[5], &a[7])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+}
diff --git a/ecc/bw6-756/fr/fft/fft_test.go b/ecc/bw6-756/fr/fft/fft_test.go
new file mode 100644
index 000000000..4748e01b9
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fft_test.go
@@ -0,0 +1,415 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/big"
+	"strconv"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestFFT(t *testing.T) {
+	const maxSize = 1 << 10
+
+	nbCosets := 3
+	domainWithPrecompute := NewDomain(maxSize, 2, true)
+	domainWOPrecompute := NewDomain(maxSize, 2, false)
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 5
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("DIF FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 0)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets with precomputed values should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWithPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets W/O precompute should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWOPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWOPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIT FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			BitReverse(pol)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && pol[i].Equal(&backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWithPrecompute.FFT(pol, DIT, uint64(i))
+				domainWithPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWOPrecompute.FFT(pol, DIT, uint64(i))
+				domainWOPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 1)
+			domainWithPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFTInverse(pol, DIF, 1)
+			domainWOPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// --------------------------------------------------------------------
+// benches
+func BenchmarkBitReverse(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		b.Run("bit reversing 2**"+strconv.Itoa(i)+"bits", func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				BitReverse(pol[:1<<i])
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFT(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		sizeDomain := 1 << i
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (no cosets)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 0, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 0)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets without precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets with precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, true)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFTDITCosetReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIT, 1)
+	}
+}
+
+func BenchmarkFFTDIFReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIF, 0)
+	}
+}
+
+func evaluatePolynomial(pol []fr.Element, val fr.Element) fr.Element {
+	var acc, res, tmp fr.Element
+	res.Set(&pol[0])
+	acc.Set(&val)
+	for i := 1; i < len(pol); i++ {
+		tmp.Mul(&acc, &pol[i])
+		res.Add(&res, &tmp)
+		acc.Mul(&acc, &val)
+	}
+	return res
+}
diff --git a/ecc/bw6-756/fr/fft/fuzz.go b/ecc/bw6-756/fr/fft/fuzz.go
new file mode 100644
index 000000000..1c35691b5
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fuzz.go
@@ -0,0 +1,74 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	// random polynomial
+	size := len(data) / 8
+	if size == 0 {
+		return fuzzDiscard
+	}
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	paddedSize := ecc.NextPowerOfTwo(uint64(size))
+	p1 := make([]fr.Element, paddedSize)
+	p2 := make([]fr.Element, paddedSize)
+	for i := 0; i < len(p1); i++ {
+		p1[i].SetRawBytes(r)
+	}
+	copy(p2, p1)
+
+	// fft domain
+	nbCosets := uint64(uint8(data[0]) % 3)
+	domainWithPrecompute := NewDomain(paddedSize, nbCosets, true)
+	domainWOPrecompute := NewDomain(paddedSize, nbCosets, false)
+
+	// bitReverse(DIF FFT(DIT FFT (bitReverse))))==id
+	for i := uint64(0); i < nbCosets; i++ {
+		BitReverse(p1)
+		domainWithPrecompute.FFT(p1, DIT, i)
+		domainWOPrecompute.FFTInverse(p1, DIF, i)
+		BitReverse(p1)
+
+		for i := 0; i < len(p1); i++ {
+			if !p1[i].Equal(&p2[i]) {
+				panic(fmt.Sprintf("bitReverse(DIF FFT(DIT FFT (bitReverse)))) != id, size %d", size))
+			}
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/fft/fuzz_test.go b/ecc/bw6-756/fr/fft/fuzz_test.go
new file mode 100644
index 000000000..9890547c0
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/fr/kzg/doc.go b/ecc/bw6-756/fr/kzg/doc.go
new file mode 100644
index 000000000..d8a77e8f6
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package kzg provides a KZG commitment scheme.
+package kzg
diff --git a/ecc/bw6-756/fr/kzg/fuzz.go b/ecc/bw6-756/fr/kzg/fuzz.go
new file mode 100644
index 000000000..de1704a8a
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/fuzz.go
@@ -0,0 +1,84 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	if len(data) == 0 {
+		return fuzzDiscard
+	}
+	size := int(uint8(data[0])) + 2 // TODO fix min size in NewScheme
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	r := bytes.NewReader(data[1:])
+	var alpha, point fr.Element
+	alpha.SetRawBytes(r)
+	point.SetRawBytes(r)
+	s := NewScheme(size, alpha)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, size/2)
+	for i := 0; i < len(f); i++ {
+		f[i] = make(polynomial.Polynomial, size)
+		for j := 0; j < len(f[i]); j++ {
+			f[i][j].SetRawBytes(r)
+		}
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, size/2)
+	for i := 0; i < len(digests); i++ {
+		digests[i], _ = s.Commit(f[i])
+
+	}
+
+	proof, err := s.BatchOpenSinglePoint(&point, digests, f)
+	if err != nil {
+		panic(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < len(f); i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			panic("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = s.BatchVerifySinglePoint(digests, &proof)
+	if err != nil {
+		panic(err)
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/kzg/fuzz_test.go b/ecc/bw6-756/fr/kzg/fuzz_test.go
new file mode 100644
index 000000000..8379a59c7
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/fr/kzg/kzg.go b/ecc/bw6-756/fr/kzg/kzg.go
new file mode 100644
index 000000000..0ff86eff1
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/kzg.go
@@ -0,0 +1,518 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"errors"
+	"hash"
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	"github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrInvalidNbDigests              = errors.New("number of digests is not the same as the number of polynomials")
+	ErrInvalidPolynomialSize         = errors.New("invalid polynomial size (larger than SRS or == 0)")
+	ErrVerifyOpeningProof            = errors.New("can't verify opening proof")
+	ErrVerifyBatchOpeningSinglePoint = errors.New("can't verify batch opening proof at single point")
+	ErrMinSRSSize                    = errors.New("minimum srs size is 2")
+)
+
+// Digest commitment of a polynomial.
+type Digest = bw6756.G1Affine
+
+// SRS stores the result of the MPC
+type SRS struct {
+	G1 []bw6756.G1Affine  // [gen [alpha]gen , [alpha**2]gen, ... ]
+	G2 [2]bw6756.G2Affine // [gen, [alpha]gen ]
+}
+
+// NewSRS returns a new SRS using alpha as randomness source
+//
+// In production, a SRS generated through MPC should be used.
+//
+// implements io.ReaderFrom and io.WriterTo
+func NewSRS(size uint64, bAlpha *big.Int) (*SRS, error) {
+	if size < 2 {
+		return nil, ErrMinSRSSize
+	}
+	var srs SRS
+	srs.G1 = make([]bw6756.G1Affine, size)
+
+	var alpha fr.Element
+	alpha.SetBigInt(bAlpha)
+
+	_, _, gen1Aff, gen2Aff := bw6756.Generators()
+	srs.G1[0] = gen1Aff
+	srs.G2[0] = gen2Aff
+	srs.G2[1].ScalarMultiplication(&gen2Aff, bAlpha)
+
+	alphas := make([]fr.Element, size-1)
+	alphas[0] = alpha
+	for i := 1; i < len(alphas); i++ {
+		alphas[i].Mul(&alphas[i-1], &alpha)
+	}
+	for i := 0; i < len(alphas); i++ {
+		alphas[i].FromMont()
+	}
+	g1s := bw6756.BatchScalarMultiplicationG1(&gen1Aff, alphas)
+	copy(srs.G1[1:], g1s)
+
+	return &srs, nil
+}
+
+// OpeningProof KZG proof for opening at a single point.
+//
+// implements io.ReaderFrom and io.WriterTo
+type OpeningProof struct {
+	// H quotient polynomial (f - f(z))/(x-z)
+	H bw6756.G1Affine
+
+	// Point at which the polynomial is evaluated
+	Point fr.Element
+
+	// ClaimedValue purported value
+	ClaimedValue fr.Element
+}
+
+// BatchOpeningProof opening proof for many polynomials at the same point
+//
+// implements io.ReaderFrom and io.WriterTo
+type BatchOpeningProof struct {
+	// H quotient polynomial Sum_i gamma**i*(f - f(z))/(x-z)
+	H bw6756.G1Affine
+
+	// Point at which the polynomials are evaluated
+	Point fr.Element
+
+	// ClaimedValues purported values
+	ClaimedValues []fr.Element
+}
+
+// Commit commits to a polynomial using a multi exponentiation with the SRS.
+// It is assumed that the polynomial is in canonical form, in Montgomery form.
+func Commit(p polynomial.Polynomial, srs *SRS, nbTasks ...int) (Digest, error) {
+
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return Digest{}, ErrInvalidPolynomialSize
+	}
+
+	var res bw6756.G1Affine
+
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	if len(nbTasks) > 0 {
+		config.NbTasks = nbTasks[0]
+	}
+	if _, err := res.MultiExp(srs.G1[:len(p)], p, config); err != nil {
+		return Digest{}, err
+	}
+
+	return res, nil
+}
+
+// Open computes an opening proof of polynomial p at given point.
+// fft.Domain Cardinality must be larger than p.Degree()
+func Open(p polynomial.Polynomial, point *fr.Element, domain *fft.Domain, srs *SRS) (OpeningProof, error) {
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return OpeningProof{}, ErrInvalidPolynomialSize
+	}
+
+	// build the proof
+	res := OpeningProof{
+		Point:        *point,
+		ClaimedValue: p.Eval(point),
+	}
+
+	// compute H
+	_p := make(polynomial.Polynomial, len(p))
+	copy(_p, p)
+	h := dividePolyByXminusA(_p, res.ClaimedValue, res.Point)
+
+	_p = nil // h re-use this memory
+
+	// commit to H
+	hCommit, err := Commit(h, srs)
+	if err != nil {
+		return OpeningProof{}, err
+	}
+	res.H.Set(&hCommit)
+
+	return res, nil
+}
+
+// Verify verifies a KZG opening proof at a single point
+func Verify(commitment *Digest, proof *OpeningProof, srs *SRS) error {
+
+	// comm(f(a))
+	var claimedValueG1Aff bw6756.G1Affine
+	var claimedValueBigInt big.Int
+	proof.ClaimedValue.ToBigIntRegular(&claimedValueBigInt)
+	claimedValueG1Aff.ScalarMultiplication(&srs.G1[0], &claimedValueBigInt)
+
+	// [f(alpha) - f(a)]G1Jac
+	var fminusfaG1Jac, tmpG1Jac bw6756.G1Jac
+	fminusfaG1Jac.FromAffine(commitment)
+	tmpG1Jac.FromAffine(&claimedValueG1Aff)
+	fminusfaG1Jac.SubAssign(&tmpG1Jac)
+
+	// [-H(alpha)]G1Aff
+	var negH bw6756.G1Affine
+	negH.Neg(&proof.H)
+
+	// [alpha-a]G2Jac
+	var alphaMinusaG2Jac, genG2Jac, alphaG2Jac bw6756.G2Jac
+	var pointBigInt big.Int
+	proof.Point.ToBigIntRegular(&pointBigInt)
+	genG2Jac.FromAffine(&srs.G2[0])
+	alphaG2Jac.FromAffine(&srs.G2[1])
+	alphaMinusaG2Jac.ScalarMultiplication(&genG2Jac, &pointBigInt).
+		Neg(&alphaMinusaG2Jac).
+		AddAssign(&alphaG2Jac)
+
+	// [alpha-a]G2Aff
+	var xminusaG2Aff bw6756.G2Affine
+	xminusaG2Aff.FromJacobian(&alphaMinusaG2Jac)
+
+	// [f(alpha) - f(a)]G1Aff
+	var fminusfaG1Aff bw6756.G1Affine
+	fminusfaG1Aff.FromJacobian(&fminusfaG1Jac)
+
+	// e([-H(alpha)]G1Aff, G2gen).e([-H(alpha)]G1Aff, [alpha-a]G2Aff) ==? 1
+	check, err := bw6756.PairingCheck(
+		[]bw6756.G1Affine{fminusfaG1Aff, negH},
+		[]bw6756.G2Affine{srs.G2[0], xminusaG2Aff},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+}
+
+// BatchOpenSinglePoint creates a batch opening proof at _val of a list of polynomials.
+// It's an interactive protocol, made non interactive using Fiat Shamir.
+// point is the point at which the polynomials are opened.
+// digests is the list of committed polynomials to open, need to derive the challenge using Fiat Shamir.
+// polynomials is the list of polynomials to open.
+func BatchOpenSinglePoint(polynomials []polynomial.Polynomial, digests []Digest, point *fr.Element, hf hash.Hash, domain *fft.Domain, srs *SRS) (BatchOpeningProof, error) {
+
+	// check for invalid sizes
+	nbDigests := len(digests)
+	if nbDigests != len(polynomials) {
+		return BatchOpeningProof{}, ErrInvalidNbDigests
+	}
+	largestPoly := -1
+	for _, p := range polynomials {
+		if len(p) == 0 || len(p) > len(srs.G1) {
+			return BatchOpeningProof{}, ErrInvalidPolynomialSize
+		}
+		if len(p) > largestPoly {
+			largestPoly = len(p)
+		}
+	}
+
+	var res BatchOpeningProof
+
+	// compute the purported values
+	res.ClaimedValues = make([]fr.Element, len(polynomials))
+	var wg sync.WaitGroup
+	wg.Add(len(polynomials))
+	for i := 0; i < len(polynomials); i++ {
+		go func(at int) {
+			res.ClaimedValues[at] = polynomials[at].Eval(point)
+			wg.Done()
+		}(i)
+	}
+
+	// set the point at which the evaluation is done
+	res.Point = *point
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(res.Point, digests, hf)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	// compute sum_i gamma**i*f(a)
+	var sumGammaiTimesEval fr.Element
+	chSumGammai := make(chan struct{}, 1)
+	go func() {
+		// wait for polynomial evaluations to be completed (res.ClaimedValues)
+		wg.Wait()
+		sumGammaiTimesEval = res.ClaimedValues[nbDigests-1]
+		for i := nbDigests - 2; i >= 0; i-- {
+			sumGammaiTimesEval.Mul(&sumGammaiTimesEval, &gamma).
+				Add(&sumGammaiTimesEval, &res.ClaimedValues[i])
+		}
+		close(chSumGammai)
+	}()
+
+	// compute sum_i gamma**i*f
+	// that is p0 + gamma * p1 + gamma^2 * p2 + ... gamma^n * pn
+	// note: if we are willing to paralellize that, we could clone the poly and scale them by
+	// gamma n in parallel, before reducing into sumGammaiTimesPol
+	sumGammaiTimesPol := make(polynomial.Polynomial, largestPoly)
+	copy(sumGammaiTimesPol, polynomials[0])
+	gammaN := gamma
+	var pj fr.Element
+	for i := 1; i < len(polynomials); i++ {
+		for j := 0; j < len(polynomials[i]); j++ {
+			pj.Mul(&polynomials[i][j], &gammaN)
+			sumGammaiTimesPol[j].Add(&sumGammaiTimesPol[j], &pj)
+		}
+		gammaN.Mul(&gammaN, &gamma)
+	}
+
+	// compute H
+	<-chSumGammai
+	h := dividePolyByXminusA(sumGammaiTimesPol, sumGammaiTimesEval, res.Point)
+	sumGammaiTimesPol = nil // same memory as h
+
+	res.H, err = Commit(h, srs)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	return res, nil
+}
+
+// FoldProof fold the digests and the proofs in batchOpeningProof using Fiat Shamir
+// to obtain an opening proof at a single point.
+//
+// * digests list of digests on which batchOpeningProof is based
+// * batchOpeningProof opening proof of digests
+// * returns the folded version of batchOpeningProof, Digest, the folded version of digests
+func FoldProof(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash) (OpeningProof, Digest, error) {
+
+	nbDigests := len(digests)
+
+	// check consistancy between numbers of claims vs number of digests
+	if nbDigests != len(batchOpeningProof.ClaimedValues) {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(batchOpeningProof.Point, digests, hf)
+	if err != nil {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// fold the claimed values and digests
+	gammai := make([]fr.Element, nbDigests)
+	gammai[0].SetOne()
+	for i := 1; i < nbDigests; i++ {
+		gammai[i].Mul(&gammai[i-1], &gamma)
+	}
+	foldedDigests, foldedEvaluations, err := fold(digests, batchOpeningProof.ClaimedValues, gammai)
+	if err != nil {
+		return OpeningProof{}, Digest{}, err
+	}
+
+	// create the folded opening proof
+	var res OpeningProof
+	res.ClaimedValue.Set(&foldedEvaluations)
+	res.H.Set(&batchOpeningProof.H)
+	res.Point.Set(&batchOpeningProof.Point)
+
+	return res, foldedDigests, nil
+}
+
+// BatchVerifySinglePoint verifies a batched opening proof at a single point of a list of polynomials.
+//
+// * digests list of digests on which opening proof is done
+// * batchOpeningProof proof of correct opening on the digests
+func BatchVerifySinglePoint(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash, srs *SRS) error {
+
+	// fold the proof
+	foldedProof, foldedDigest, err := FoldProof(digests, batchOpeningProof, hf)
+	if err != nil {
+		return err
+	}
+
+	// verify the foldedProof againts the foldedDigest
+	err = Verify(&foldedDigest, &foldedProof, srs)
+	return err
+
+}
+
+// BatchVerifyMultiPoints batch verifies a list of opening proofs at different points.
+// The purpose of the batching is to have only one pairing for verifying several proofs.
+//
+// * digests list of committed polynomials which are opened
+// * proofs list of opening proofs of the digest
+func BatchVerifyMultiPoints(digests []Digest, proofs []OpeningProof, srs *SRS) error {
+
+	// check consistancy nb proogs vs nb digests
+	if len(digests) != len(proofs) {
+		return ErrInvalidNbDigests
+	}
+
+	// if only one digest, call Verify
+	if len(digests) == 1 {
+		return Verify(&digests[0], &proofs[0], srs)
+	}
+
+	// sample random numbers for sampling
+	randomNumbers := make([]fr.Element, len(digests))
+	randomNumbers[0].SetOne()
+	for i := 1; i < len(randomNumbers); i++ {
+		_, err := randomNumbers[i].SetRandom()
+		if err != nil {
+			return err
+		}
+	}
+
+	// combine random_i*quotient_i
+	var foldedQuotients bw6756.G1Affine
+	quotients := make([]bw6756.G1Affine, len(proofs))
+	for i := 0; i < len(randomNumbers); i++ {
+		quotients[i].Set(&proofs[i].H)
+	}
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	_, err := foldedQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return nil
+	}
+
+	// fold digests and evals
+	evals := make([]fr.Element, len(digests))
+	for i := 0; i < len(randomNumbers); i++ {
+		evals[i].Set(&proofs[i].ClaimedValue)
+	}
+	foldedDigests, foldedEvals, err := fold(digests, evals, randomNumbers)
+	if err != nil {
+		return err
+	}
+
+	// compute commitment to folded Eval
+	var foldedEvalsCommit bw6756.G1Affine
+	var foldedEvalsBigInt big.Int
+	foldedEvals.ToBigIntRegular(&foldedEvalsBigInt)
+	foldedEvalsCommit.ScalarMultiplication(&srs.G1[0], &foldedEvalsBigInt)
+
+	// compute F = foldedDigests - foldedEvalsCommit
+	foldedDigests.Sub(&foldedDigests, &foldedEvalsCommit)
+
+	// combine random_i*(point_i*quotient_i)
+	var foldedPointsQuotients bw6756.G1Affine
+	for i := 0; i < len(randomNumbers); i++ {
+		randomNumbers[i].Mul(&randomNumbers[i], &proofs[i].Point)
+	}
+	_, err = foldedPointsQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return err
+	}
+
+	// lhs first pairing
+	foldedDigests.Add(&foldedDigests, &foldedPointsQuotients)
+
+	// lhs second pairing
+	foldedQuotients.Neg(&foldedQuotients)
+
+	// pairing check
+	check, err := bw6756.PairingCheck(
+		[]bw6756.G1Affine{foldedDigests, foldedQuotients},
+		[]bw6756.G2Affine{srs.G2[0], srs.G2[1]},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+
+}
+
+// fold folds digests and evaluations using the list of factors as random numbers.
+//
+// * digests list of digests to fold
+// * evaluations list of evaluations to fold
+// * factors list of multiplicative factors used for the folding (in Montgomery form)
+func fold(digests []Digest, evaluations []fr.Element, factors []fr.Element) (Digest, fr.Element, error) {
+
+	// length inconsistancy between digests and evaluations should have been done before calling this function
+	nbDigests := len(digests)
+
+	// fold the claimed values
+	var foldedEvaluations, tmp fr.Element
+	for i := 0; i < nbDigests; i++ {
+		tmp.Mul(&evaluations[i], &factors[i])
+		foldedEvaluations.Add(&foldedEvaluations, &tmp)
+	}
+
+	// fold the digests
+	var foldedDigests Digest
+	_, err := foldedDigests.MultiExp(digests, factors, ecc.MultiExpConfig{ScalarsMont: true})
+	if err != nil {
+		return foldedDigests, foldedEvaluations, err
+	}
+
+	// folding done
+	return foldedDigests, foldedEvaluations, nil
+
+}
+
+// deriveGamma derives a challenge using Fiat Shamir to fold proofs.
+func deriveGamma(point fr.Element, digests []Digest, hf hash.Hash) (fr.Element, error) {
+
+	// derive the challenge gamma, binded to the point and the commitments
+	fs := fiatshamir.NewTranscript(hf, "gamma")
+	if err := fs.Bind("gamma", point.Marshal()); err != nil {
+		return fr.Element{}, err
+	}
+	for i := 0; i < len(digests); i++ {
+		if err := fs.Bind("gamma", digests[i].Marshal()); err != nil {
+			return fr.Element{}, err
+		}
+	}
+	gammaByte, err := fs.ComputeChallenge("gamma")
+	if err != nil {
+		return fr.Element{}, err
+	}
+	var gamma fr.Element
+	gamma.SetBytes(gammaByte)
+
+	return gamma, nil
+}
+
+// dividePolyByXminusA computes (f-f(a))/(x-a), in canonical basis, in regular form
+// f memory is re-used for the result
+func dividePolyByXminusA(f polynomial.Polynomial, fa, a fr.Element) polynomial.Polynomial {
+
+	// first we compute f-f(a)
+	f[0].Sub(&f[0], &fa)
+
+	// now we use syntetic division to divide by x-a
+	var t fr.Element
+	for i := len(f) - 2; i >= 0; i-- {
+		t.Mul(&f[i+1], &a)
+
+		f[i].Add(&f[i], &t)
+	}
+
+	// the result is of degree deg(f)-1
+	return f[1:]
+}
diff --git a/ecc/bw6-756/fr/kzg/kzg_test.go b/ecc/bw6-756/fr/kzg/kzg_test.go
new file mode 100644
index 000000000..9e0757166
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/kzg_test.go
@@ -0,0 +1,453 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"math/big"
+	"reflect"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+)
+
+// testSRS re-used accross tests of the KZG scheme
+var testSRS *SRS
+
+func init() {
+	const srsSize = 230
+	testSRS, _ = NewSRS(ecc.NextPowerOfTwo(srsSize), new(big.Int).SetInt64(42))
+}
+
+func TestDividePolyByXminusA(t *testing.T) {
+
+	const pSize = 230
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	// evaluate the polynomial at a random point
+	var point fr.Element
+	point.SetRandom()
+	evaluation := pol.Eval(&point)
+
+	// probabilistic test (using Schwartz Zippel lemma, evaluation at one point is enough)
+	var randPoint, xminusa fr.Element
+	randPoint.SetRandom()
+	polRandpoint := pol.Eval(&randPoint)
+	polRandpoint.Sub(&polRandpoint, &evaluation) // f(rand)-f(point)
+
+	// compute f-f(a)/x-a
+	h := dividePolyByXminusA(pol, evaluation, point)
+	pol = nil // h reuses this memory
+
+	if len(h) != 229 {
+		t.Fatal("inconsistant size of quotient")
+	}
+
+	hRandPoint := h.Eval(&randPoint)
+	xminusa.Sub(&randPoint, &point) // rand-point
+
+	// f(rand)-f(point)	==? h(rand)*(rand-point)
+	hRandPoint.Mul(&hRandPoint, &xminusa)
+
+	if !hRandPoint.Equal(&polRandpoint) {
+		t.Fatal("Error f-f(a)/x-a")
+	}
+}
+
+func TestSerializationSRS(t *testing.T) {
+
+	// create a SRS
+	srs, err := NewSRS(64, new(big.Int).SetInt64(42))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// serialize it...
+	var buf bytes.Buffer
+	_, err = srs.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// reconstruct the SRS
+	var _srs SRS
+	_, err = _srs.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compare
+	if !reflect.DeepEqual(srs, &_srs) {
+		t.Fatal("scheme serialization failed")
+	}
+
+}
+
+func TestCommit(t *testing.T) {
+
+	// create a polynomial
+	f := make(polynomial.Polynomial, 60)
+	for i := 0; i < 60; i++ {
+		f[i].SetRandom()
+	}
+
+	// commit using the method from KZG
+	_kzgCommit, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var kzgCommit bw6756.G1Affine
+	kzgCommit.Unmarshal(_kzgCommit.Marshal())
+
+	// check commitment using manual commit
+	var x fr.Element
+	x.SetString("42")
+	fx := f.Eval(&x)
+	var fxbi big.Int
+	fx.ToBigIntRegular(&fxbi)
+	var manualCommit bw6756.G1Affine
+	manualCommit.Set(&testSRS.G1[0])
+	manualCommit.ScalarMultiplication(&manualCommit, &fxbi)
+
+	// compare both results
+	if !kzgCommit.Equal(&manualCommit) {
+		t.Fatal("error KZG commitment")
+	}
+
+}
+
+func TestVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create a polynomial
+	f := randomPolynomial(60)
+
+	// commit the polynomial
+	digest, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := Open(f, &point, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed valued
+	expected := f.Eval(&point)
+	if !proof.ClaimedValue.Equal(&expected) {
+		t.Fatal("inconsistant claimed value")
+	}
+
+	// verify correct proof
+	err = Verify(&digest, &proof, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValue.Double(&proof.ClaimedValue)
+	err = Verify(&digest, &proof, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+}
+
+func TestBatchVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := BatchOpenSinglePoint(f, digests, &point, hf, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < 10; i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			t.Fatal("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+
+}
+
+func TestBatchVerifyMultiPoints(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute 2 batch opening proofs at 2 random points
+	points := make([]fr.Element, 2)
+	batchProofs := make([]BatchOpeningProof, 2)
+	points[0].SetRandom()
+	batchProofs[0], _ = BatchOpenSinglePoint(f[:5], digests[:5], &points[0], hf, domain, testSRS)
+	points[1].SetRandom()
+	batchProofs[1], _ = BatchOpenSinglePoint(f[5:], digests[5:], &points[1], hf, domain, testSRS)
+
+	// fold the 2 batch opening proofs
+	proofs := make([]OpeningProof, 2)
+	foldedDigests := make([]Digest, 2)
+	proofs[0], foldedDigests[0], _ = FoldProof(digests[:5], &batchProofs[0], hf)
+	proofs[1], foldedDigests[1], _ = FoldProof(digests[5:], &batchProofs[1], hf)
+
+	// check the the individual batch proofs are correct
+	err := Verify(&foldedDigests[0], &proofs[0], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = Verify(&foldedDigests[1], &proofs[1], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify correct folded proofs
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify tampered folded proofs
+	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err == nil {
+		t.Fatal(err)
+	}
+
+}
+
+const benchSize = 1 << 16
+
+func BenchmarkKZGCommit(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Commit(p, benchSRS)
+	}
+}
+
+func BenchmarkDivideByXMinusA(b *testing.B) {
+	const pSize = 1 << 22
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+	var a, fa fr.Element
+	a.SetRandom()
+	fa.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		dividePolyByXminusA(pol, fa, a)
+		pol = pol[:pSize]
+		pol[pSize-1] = pol[0]
+	}
+}
+
+func BenchmarkKZGOpen(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Open(p, &r, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGVerify(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// kzg scheme
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	// commit
+	comm, err := Commit(p, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// open
+	openingProof, err := Open(p, &r, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Verify(&comm, &openingProof, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchOpen10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchVerify10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	proof, err := BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchVerifySinglePoint(commitments[:], &proof, hf, benchSRS)
+	}
+}
+
+func randomPolynomial(size int) polynomial.Polynomial {
+	f := make(polynomial.Polynomial, size)
+	for i := 0; i < size; i++ {
+		f[i].SetRandom()
+	}
+	return f
+}
diff --git a/ecc/bw6-756/fr/kzg/marshal.go b/ecc/bw6-756/fr/kzg/marshal.go
new file mode 100644
index 000000000..a79315f91
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/marshal.go
@@ -0,0 +1,138 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"io"
+)
+
+// WriteTo writes binary encoding of the SRS
+func (srs *SRS) WriteTo(w io.Writer) (int64, error) {
+	// encode the SRS
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		srs.G1,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes SRS data from reader.
+func (srs *SRS) ReadFrom(r io.Reader) (int64, error) {
+	// decode the SRS
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		&srs.G1,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a OpeningProof
+func (proof *OpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes OpeningProof data from reader.
+func (proof *OpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a BatchOpeningProof
+func (proof *BatchOpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		proof.ClaimedValues,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes BatchOpeningProof data from reader.
+func (proof *BatchOpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValues,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bw6-756/fr/mimc/doc.go b/ecc/bw6-756/fr/mimc/doc.go
new file mode 100644
index 000000000..497bd40a9
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package mimc provides MiMC hash function using Miyaguchi–Preneel construction.
+package mimc
diff --git a/ecc/bw6-756/fr/mimc/fuzz.go b/ecc/bw6-756/fr/mimc/fuzz.go
new file mode 100644
index 000000000..41b557cf3
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/fuzz.go
@@ -0,0 +1,34 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	var s []byte
+	h := NewMiMC(string(data))
+	h.Write(data)
+	h.Sum(s)
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/mimc/mimc.go b/ecc/bw6-756/fr/mimc/mimc.go
new file mode 100644
index 000000000..e0f90b33b
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/mimc.go
@@ -0,0 +1,174 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+import (
+	"hash"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"golang.org/x/crypto/sha3"
+)
+
+const mimcNbRounds = 91
+
+// BlockSize size that mimc consumes
+const BlockSize = fr.Bytes
+
+// Params constants for the mimc hash function
+type Params []fr.Element
+
+// NewParams creates new mimc object
+func NewParams(seed string) Params {
+
+	// set the constants
+	res := make(Params, mimcNbRounds)
+
+	rnd := sha3.Sum256([]byte(seed))
+	value := new(big.Int).SetBytes(rnd[:])
+
+	for i := 0; i < mimcNbRounds; i++ {
+		rnd = sha3.Sum256(value.Bytes())
+		value.SetBytes(rnd[:])
+		res[i].SetBigInt(value)
+	}
+
+	return res
+}
+
+// digest represents the partial evaluation of the checksum
+// along with the params of the mimc function
+type digest struct {
+	Params Params
+	h      fr.Element
+	data   []byte // data to hash
+}
+
+// NewMiMC returns a MiMCImpl object, pure-go reference implementation
+func NewMiMC(seed string) hash.Hash {
+	d := new(digest)
+	params := NewParams(seed)
+	//d.Reset()
+	d.Params = params
+	d.Reset()
+	return d
+}
+
+// Reset resets the Hash to its initial state.
+func (d *digest) Reset() {
+	d.data = nil
+	d.h = fr.Element{0, 0, 0, 0}
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (d *digest) Sum(b []byte) []byte {
+	buffer := d.checksum()
+	d.data = nil // flush the data already hashed
+	hash := buffer.Bytes()
+	b = append(b, hash[:]...)
+	return b
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (d *digest) Size() int {
+	return BlockSize
+}
+
+// BlockSize returns the number of bytes Sum will return.
+func (d *digest) BlockSize() int {
+	return BlockSize
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.data = append(d.data, p...)
+	return
+}
+
+// Hash hash using Miyaguchi–Preneel:
+// https://en.wikipedia.org/wiki/One-way_compression_function
+// The XOR operation is replaced by field addition, data is in Montgomery form
+func (d *digest) checksum() fr.Element {
+
+	var buffer [BlockSize]byte
+	var x fr.Element
+
+	// if data size is not multiple of BlockSizes we padd:
+	// .. || 0xaf8 -> .. || 0x0000...0af8
+	if len(d.data)%BlockSize != 0 {
+		q := len(d.data) / BlockSize
+		r := len(d.data) % BlockSize
+		sliceq := make([]byte, q*BlockSize)
+		copy(sliceq, d.data)
+		slicer := make([]byte, r)
+		copy(slicer, d.data[q*BlockSize:])
+		sliceremainder := make([]byte, BlockSize-r)
+		d.data = append(sliceq, sliceremainder...)
+		d.data = append(d.data, slicer...)
+	}
+
+	if len(d.data) == 0 {
+		d.data = make([]byte, 32)
+	}
+
+	nbChunks := len(d.data) / BlockSize
+
+	for i := 0; i < nbChunks; i++ {
+		copy(buffer[:], d.data[i*BlockSize:(i+1)*BlockSize])
+		x.SetBytes(buffer[:])
+		d.encrypt(x)
+		d.h.Add(&x, &d.h)
+	}
+
+	return d.h
+}
+
+// plain execution of a mimc run
+// m: message
+// k: encryption key
+func (d *digest) encrypt(m fr.Element) {
+
+	for i := 0; i < len(d.Params); i++ {
+		// m = (m+k+c)^5
+		var tmp fr.Element
+		tmp.Add(&m, &d.h).Add(&tmp, &d.Params[i])
+		m.Square(&tmp).
+			Square(&m).
+			Mul(&m, &tmp)
+	}
+	m.Add(&m, &d.h)
+	d.h = m
+}
+
+// Sum computes the mimc hash of msg from seed
+func Sum(seed string, msg []byte) ([]byte, error) {
+	params := NewParams(seed)
+	var d digest
+	d.Params = params
+	if _, err := d.Write(msg); err != nil {
+		return nil, err
+	}
+	h := d.checksum()
+	bytes := h.Bytes()
+	return bytes[:], nil
+}
diff --git a/ecc/bw6-756/fr/permutation/doc.go b/ecc/bw6-756/fr/permutation/doc.go
new file mode 100644
index 000000000..bdf98e6ca
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package permutation provides an API to build permutation proofs.
+package permutation
diff --git a/ecc/bw6-756/fr/permutation/permutation.go b/ecc/bw6-756/fr/permutation/permutation.go
new file mode 100644
index 000000000..8deb3563b
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/permutation.go
@@ -0,0 +1,361 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("t1 and t2 should be of the same size")
+	ErrSize             = errors.New("t1 and t2 should be of size a power of 2")
+	ErrPermutationProof = errors.New("permutation proof verification failed")
+)
+
+// Proof proof that the commitments of t1 and t2 come from
+// the same vector but permuted.
+type Proof struct {
+
+	// size of the polynomials
+	size int
+
+	// commitments of t1 & t2, the permuted vectors, and z, the accumulation
+	// polynomial
+	t1, t2, z kzg.Digest
+
+	// commitment to the quotient polynomial
+	q kzg.Digest
+
+	// opening proofs of t1, t2, z, q (in that order)
+	batchedProof kzg.BatchOpeningProof
+
+	// shifted opening proof of z
+	shiftedProof kzg.OpeningProof
+}
+
+// computeZ returns the accumulation polynomial in Lagrange basis.
+func computeZ(lt1, lt2 []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	z := make([]fr.Element, s)
+	d := make([]fr.Element, s)
+	z[0].SetOne()
+	d[0].SetOne()
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	var t fr.Element
+	for i := 0; i < s-1; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_i], t.Sub(&epsilon, &lt1[i]))
+		d[i+1].Mul(&d[i], t.Sub(&epsilon, &lt2[i]))
+	}
+	d = fr.BatchInvert(d)
+	for i := 0; i < s-1; i++ {
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_ii], &d[i+1])
+	}
+
+	return z
+}
+
+// computeH computes lt2*z(gx) - lt1*z
+func computeH(lt1, lt2, lz []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	res := make([]fr.Element, s)
+	var a, b fr.Element
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		a.Sub(&epsilon, &lt2[_i])
+		a.Mul(&lz[_ii], &a)
+		b.Sub(&epsilon, &lt1[_i])
+		b.Mul(&lz[_i], &b)
+		res[_i].Sub(&a, &b)
+	}
+	return res
+}
+
+// computeH0 computes L0 * (z-1)
+func computeH0(lz []fr.Element, d *fft.Domain) []fr.Element {
+
+	var tn, o, g fr.Element
+	s := len(lz)
+	tn.SetUint64(2).
+		Neg(&tn)
+	u := make([]fr.Element, s)
+	o.SetOne()
+	g.Set(&d.FinerGenerator)
+	for i := 0; i < s; i++ {
+		u[i].Sub(&g, &o)
+		g.Mul(&g, &d.Generator)
+	}
+	u = fr.BatchInvert(u)
+	res := make([]fr.Element, s)
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lz[_i], &o).
+			Mul(&res[_i], &u[i]).
+			Mul(&res[_i], &tn)
+	}
+	return res
+}
+
+// Prove generates a proof that t1 and t2 are the same but permuted.
+// The size of t1 and t2 should be the same and a power of 2.
+func Prove(srs *kzg.SRS, t1, t2 []fr.Element) (Proof, error) {
+
+	// res
+	var proof Proof
+	var err error
+
+	// size checking
+	if len(t1) != len(t2) {
+		return proof, ErrIncompatibleSize
+	}
+
+	// create the domains
+	d := fft.NewDomain(uint64(len(t1)), 1, false)
+	if d.Cardinality != uint64(len(t1)) {
+		return proof, ErrSize
+	}
+	s := int(d.Cardinality)
+	proof.size = s
+
+	// hash function for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// commit t1, t2
+	ct1 := make([]fr.Element, s)
+	ct2 := make([]fr.Element, s)
+	copy(ct1, t1)
+	copy(ct2, t2)
+	d.FFTInverse(ct1, fft.DIF, 0)
+	d.FFTInverse(ct2, fft.DIF, 0)
+	fft.BitReverse(ct1)
+	fft.BitReverse(ct2)
+	proof.t1, err = kzg.Commit(ct1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.t2, err = kzg.Commit(ct2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive challenge for z
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute Z and commit it
+	cz := computeZ(t1, t2, epsilon)
+	d.FFTInverse(cz, fft.DIT, 0)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+	lz := make([]fr.Element, s)
+	copy(lz, cz)
+	d.FFT(lz, fft.DIF, 1)
+
+	// compute the first part of the numerator
+	lt1 := make([]fr.Element, s)
+	lt2 := make([]fr.Element, s)
+	copy(lt1, ct1)
+	copy(lt2, ct2)
+	d.FFT(lt1, fft.DIF, 1)
+	d.FFT(lt2, fft.DIF, 1)
+	h := computeH(lt1, lt2, lz, epsilon)
+
+	// compute second part of the numerator
+	h0 := computeH0(lz, d)
+
+	// derive challenge used for the folding
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+
+	// fold the numerator and divide it by x^n-1
+	var t fr.Element
+	t.SetUint64(2).Neg(&t).Inverse(&t)
+	for i := 0; i < s; i++ {
+		h0[i].Mul(&omega, &h0[i]).
+			Add(&h0[i], &h[i]).
+			Mul(&h0[i], &t)
+	}
+
+	// get the quotient and commit it
+	d.FFTInverse(h0, fft.DIT, 1)
+	proof.q, err = kzg.Commit(h0, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive the evaluation challenge
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute the opening proofs
+	proof.batchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ct1,
+			ct2,
+			cz,
+			h0,
+		},
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&eta,
+		hFunc,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	eta.Mul(&eta, &d.Generator)
+	proof.shiftedProof, err = kzg.Open(
+		cz,
+		&eta,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	// done
+	return proof, nil
+
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bw6756.G1Affine) (fr.Element, error) {
+
+	var buf [bw6756.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
+
+// Verify verifies a permutation proof.
+func Verify(srs *kzg.SRS, proof Proof) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// derive the challenges
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return err
+	}
+
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return err
+	}
+
+	// check the relation
+	bs := big.NewInt(int64(proof.size))
+	var l0, a, b, one, rhs, lhs fr.Element
+	one.SetOne()
+	rhs.Exp(eta, bs).
+		Sub(&rhs, &one)
+	a.Sub(&eta, &one)
+	l0.Div(&rhs, &a)
+	rhs.Mul(&rhs, &proof.batchedProof.ClaimedValues[3])
+	a.Sub(&epsilon, &proof.batchedProof.ClaimedValues[1]).
+		Mul(&a, &proof.shiftedProof.ClaimedValue)
+	b.Sub(&epsilon, &proof.batchedProof.ClaimedValues[0]).
+		Mul(&b, &proof.batchedProof.ClaimedValues[2])
+	lhs.Sub(&a, &b)
+	a.Sub(&proof.batchedProof.ClaimedValues[2], &one).
+		Mul(&a, &l0).
+		Mul(&a, &omega)
+	lhs.Add(&a, &lhs)
+	if !lhs.Equal(&rhs) {
+		return ErrPermutationProof
+	}
+
+	// check the opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&proof.batchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.Verify(&proof.z, &proof.shiftedProof, srs)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/ecc/bw6-756/fr/permutation/permutation_test.go b/ecc/bw6-756/fr/permutation/permutation_test.go
new file mode 100644
index 000000000..9f56b94c6
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/permutation_test.go
@@ -0,0 +1,94 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+)
+
+func TestProof(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	a := make([]fr.Element, 8)
+	b := make([]fr.Element, 8)
+
+	for i := 0; i < 8; i++ {
+		a[i].SetUint64(uint64(4*i + 1))
+	}
+	for i := 0; i < 8; i++ {
+		b[i].Set(&a[(5*i)%8])
+	}
+
+	// correct proof
+	{
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		a[0].SetRandom()
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkProver(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make([]fr.Element, polySize)
+	c := make([]fr.Element, polySize)
+
+	for i := 0; i < polySize; i++ {
+		a[i].SetUint64(uint64(i))
+	}
+	for i := 0; i < polySize; i++ {
+		c[i].Set(&a[(5*i)%(polySize)])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Prove(srs, a, c)
+	}
+
+}
diff --git a/ecc/bw6-756/fr/plookup/doc.go b/ecc/bw6-756/fr/plookup/doc.go
new file mode 100644
index 000000000..ec4b91287
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package plookup provides an API to build plookup proofs.
+package plookup
diff --git a/ecc/bw6-756/fr/plookup/plookup_test.go b/ecc/bw6-756/fr/plookup/plookup_test.go
new file mode 100644
index 000000000..5f8ed24ff
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/plookup_test.go
@@ -0,0 +1,139 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+)
+
+func TestLookupVector(t *testing.T) {
+
+	lookupVector := make(Table, 8)
+	fvector := make(Table, 7)
+	for i := 0; i < 8; i++ {
+		lookupVector[i].SetUint64(uint64(2 * i))
+	}
+	for i := 0; i < 7; i++ {
+		fvector[i].Set(&lookupVector[(4*i+1)%8])
+	}
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// correct proof vector
+	{
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proofs vector
+	{
+		fvector[0].SetRandom()
+
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func TestLookupTable(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	lookupTable := make([]Table, 3)
+	fTable := make([]Table, 3)
+	for i := 0; i < 3; i++ {
+		lookupTable[i] = make(Table, 8)
+		fTable[i] = make(Table, 7)
+		for j := 0; j < 8; j++ {
+			lookupTable[i][j].SetUint64(uint64(2*i + j))
+		}
+		for j := 0; j < 7; j++ {
+			fTable[i][j].Set(&lookupTable[i][(4*j+1)%8])
+		}
+	}
+
+	// correct proof
+	{
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		fTable[0][0].SetRandom()
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkPlookup(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make(Table, polySize)
+	c := make(Table, polySize)
+
+	for i := 0; i < 1<<14; i++ {
+		a[i].SetUint64(uint64(i))
+		c[i].SetUint64(uint64((8 * i) % polySize))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ProveLookupVector(srs, a, c)
+	}
+}
diff --git a/ecc/bw6-756/fr/plookup/table.go b/ecc/bw6-756/fr/plookup/table.go
new file mode 100644
index 000000000..7bbca097c
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/table.go
@@ -0,0 +1,252 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"sort"
+
+	bw6756 "github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/permutation"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("the tables in f and t are not of the same size")
+	ErrFoldedCommitment = errors.New("the folded commitment is malformed")
+	ErrNumberDigests    = errors.New("proof.ts and proof.fs are not of the same length")
+)
+
+// ProofLookupTables proofs that a list of tables
+type ProofLookupTables struct {
+
+	// commitments to the rows f
+	fs []kzg.Digest
+
+	// commitments to the rows of t
+	ts []kzg.Digest
+
+	// lookup proof for the f and t folded
+	foldedProof ProofLookupVector
+
+	// proof that the ts folded correspond to t in the folded proof
+	permutationProof permutation.Proof
+}
+
+// ProveLookupTables generates a proof that f, seen as a multi dimensional table,
+// consists of vectors that are in t. In other words for each i, f[:][i] must be one
+// of the t[:][j].
+//
+// For instance, if t is the truth table of the XOR function, t will be populated such
+// that t[:][i] contains the i-th entry of the truth table, so t[0][i] XOR t[1][i] = t[2][i].
+//
+// The Table in f and t are supposed to be of the same size constant size.
+func ProveLookupTables(srs *kzg.SRS, f, t []Table) (ProofLookupTables, error) {
+
+	// res
+	proof := ProofLookupTables{}
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check the sizes
+	if len(f) != len(t) {
+		return proof, ErrIncompatibleSize
+	}
+	s := len(f[0])
+	for i := 1; i < len(f); i++ {
+		if len(f[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+	s = len(t[0])
+	for i := 1; i < len(t); i++ {
+		if len(t[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+
+	// commit to the tables in f and t
+	nbRows := len(t)
+	proof.fs = make([]kzg.Digest, nbRows)
+	proof.ts = make([]kzg.Digest, nbRows)
+	_nbColumns := len(f[0]) + 1
+	if _nbColumns < len(t[0]) {
+		_nbColumns = len(t[0])
+	}
+	d := fft.NewDomain(uint64(_nbColumns), 0, false)
+	nbColumns := d.Cardinality
+	lfs := make([][]fr.Element, nbRows)
+	cfs := make([][]fr.Element, nbRows)
+	lts := make([][]fr.Element, nbRows)
+	cts := make([][]fr.Element, nbRows)
+
+	for i := 0; i < nbRows; i++ {
+
+		cfs[i] = make([]fr.Element, nbColumns)
+		lfs[i] = make([]fr.Element, nbColumns)
+		copy(cfs[i], f[i])
+		copy(lfs[i], f[i])
+		for j := len(f[i]); j < int(nbColumns); j++ {
+			cfs[i][j] = f[i][len(f[i])-1]
+			lfs[i][j] = f[i][len(f[i])-1]
+		}
+		d.FFTInverse(cfs[i], fft.DIF, 0)
+		fft.BitReverse(cfs[i])
+		proof.fs[i], err = kzg.Commit(cfs[i], srs)
+		if err != nil {
+			return proof, err
+		}
+
+		cts[i] = make([]fr.Element, nbColumns)
+		lts[i] = make([]fr.Element, nbColumns)
+		copy(cts[i], t[i])
+		copy(lts[i], t[i])
+		for j := len(t[i]); j < int(d.Cardinality); j++ {
+			cts[i][j] = t[i][len(t[i])-1]
+			lts[i][j] = t[i][len(t[i])-1]
+		}
+		d.FFTInverse(cts[i], fft.DIF, 0)
+		fft.BitReverse(cts[i])
+		proof.ts[i], err = kzg.Commit(cts[i], srs)
+		if err != nil {
+			return proof, err
+		}
+	}
+
+	// fold f and t
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = new(kzg.Digest)
+		comms[i].Set(&proof.fs[i])
+		comms[nbRows+i] = new(kzg.Digest)
+		comms[nbRows+i].Set(&proof.ts[i])
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return proof, err
+	}
+	foldedf := make(Table, nbColumns)
+	foldedt := make(Table, nbColumns)
+	for i := 0; i < int(nbColumns); i++ {
+		for j := nbRows - 1; j >= 0; j-- {
+			foldedf[i].Mul(&foldedf[i], &lambda).
+				Add(&foldedf[i], &lfs[j][i])
+			foldedt[i].Mul(&foldedt[i], &lambda).
+				Add(&foldedt[i], &lts[j][i])
+		}
+	}
+
+	// generate a proof of permutation of the foldedt and sort(foldedt)
+	foldedtSorted := make(Table, nbColumns)
+	copy(foldedtSorted, foldedt)
+	sort.Sort(foldedtSorted)
+	proof.permutationProof, err = permutation.Prove(srs, foldedt, foldedtSorted)
+	if err != nil {
+		return proof, err
+	}
+
+	// call plookupVector, on foldedf[:len(foldedf)-1] to ensure that the domain size
+	// in ProveLookupVector is the same as d's
+	proof.foldedProof, err = ProveLookupVector(srs, foldedf[:len(foldedf)-1], foldedt)
+
+	return proof, err
+}
+
+// VerifyLookupTables verifies that a ProofLookupTables proof is correct.
+func VerifyLookupTables(srs *kzg.SRS, proof ProofLookupTables) error {
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check that the number of digests is the same
+	if len(proof.fs) != len(proof.ts) {
+		return ErrNumberDigests
+	}
+
+	// fold the commitments fs and ts
+	nbRows := len(proof.fs)
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = &proof.fs[i]
+		comms[i+nbRows] = &proof.ts[i]
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return err
+	}
+
+	// fold the commitments of the rows of t and f
+	var comf, comt kzg.Digest
+	comf.Set(&proof.fs[nbRows-1])
+	comt.Set(&proof.ts[nbRows-1])
+	var blambda big.Int
+	lambda.ToBigIntRegular(&blambda)
+	for i := nbRows - 2; i >= 0; i-- {
+		comf.ScalarMultiplication(&comf, &blambda).
+			Add(&comf, &proof.fs[i])
+		comt.ScalarMultiplication(&comt, &blambda).
+			Add(&comt, &proof.ts[i])
+	}
+
+	// check that the folded commitment of the fs correspond to foldedProof.f
+	if !comf.Equal(&proof.foldedProof.f) {
+		return ErrFoldedCommitment
+	}
+
+	// check that the folded commitment of the ts is a permutation of proof.FoldedProof.t
+	err = permutation.Verify(srs, proof.permutationProof)
+	if err != nil {
+		return err
+	}
+
+	// verify the inner proof
+	return VerifyLookupVector(srs, proof.foldedProof)
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bw6756.G1Affine) (fr.Element, error) {
+
+	var buf [bw6756.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
diff --git a/ecc/bw6-756/fr/plookup/vector.go b/ecc/bw6-756/fr/plookup/vector.go
new file mode 100644
index 000000000..4d64b9b49
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/vector.go
@@ -0,0 +1,687 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+	"sort"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrNotInTable          = errors.New("some value in the vector is not in the lookup table")
+	ErrPlookupVerification = errors.New("plookup verification failed")
+)
+
+type Table []fr.Element
+
+// Len is the number of elements in the collection.
+func (t Table) Len() int {
+	return len(t)
+}
+
+// Less reports whether the element with
+// index i should sort before the element with index j.
+func (t Table) Less(i, j int) bool {
+	return t[i].Cmp(&t[j]) == -1
+}
+
+// Swap swaps the elements with indexes i and j.
+func (t Table) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+// Proof Plookup proof, containing opening proofs
+type ProofLookupVector struct {
+
+	// size of the system
+	size uint64
+
+	// Commitments to h1, h2, t, z, f, h
+	h1, h2, t, z, f, h kzg.Digest
+
+	// Batch opening proof of h1, h2, z, t
+	BatchedProof kzg.BatchOpeningProof
+
+	// Batch opening proof of h1, h2, z shifted by g
+	BatchedProofShifted kzg.BatchOpeningProof
+}
+
+// computeZ computes Z, in Lagrange basis. Z is the accumulation of the partial
+// ratios of 2 fully split polynomials (cf https://eprint.iacr.org/2020/315.pdf)
+// * lf is the list of values that should be in lt
+// * lt is the lookup table
+// * lh1, lh2 is lf sorted by lt split in 2 overlapping slices
+// * beta, gamma are challenges (Schwartz-zippel: they are the random evaluations point)
+func computeZ(lf, lt, lh1, lh2 []fr.Element, beta, gamma fr.Element) []fr.Element {
+
+	z := make([]fr.Element, len(lt))
+
+	n := len(lt)
+	d := make([]fr.Element, n-1)
+	var u, c fr.Element
+	c.SetOne().
+		Add(&c, &beta).
+		Mul(&c, &gamma)
+	for i := 0; i < n-1; i++ {
+
+		d[i].Mul(&beta, &lh1[i+1]).
+			Add(&d[i], &lh1[i]).
+			Add(&d[i], &c)
+
+		u.Mul(&beta, &lh2[i+1]).
+			Add(&u, &lh2[i]).
+			Add(&u, &c)
+
+		d[i].Mul(&d[i], &u)
+	}
+	d = fr.BatchInvert(d)
+
+	z[0].SetOne()
+	var a, b, e fr.Element
+	e.SetOne().Add(&e, &beta)
+	for i := 0; i < n-1; i++ {
+
+		a.Add(&gamma, &lf[i])
+
+		b.Mul(&beta, &lt[i+1]).
+			Add(&b, &lt[i]).
+			Add(&b, &c)
+
+		a.Mul(&a, &b).
+			Mul(&a, &e)
+
+		z[i+1].Mul(&z[i], &a).
+			Mul(&z[i+1], &d[i])
+	}
+
+	return z
+}
+
+// computeH computes the evaluation (shifted, bit reversed) of h where
+// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+//
+// * cz, ch1, ch2, ct, cf are the polynomials z, h1, h2, t, f in canonical basis
+// * _lz, _lh1, _lh2, _lt, _lf are the polynomials z, h1, h2, t, f in shifted Lagrange basis (domainH)
+// * beta, gamma are the challenges
+// * it returns h in canonical basis
+func computeH(_lz, _lh1, _lh2, _lt, _lf []fr.Element, beta, gamma fr.Element, domainH *fft.Domain) []fr.Element {
+
+	// result
+	s := int(domainH.Cardinality)
+	num := make([]fr.Element, domainH.Cardinality)
+
+	var u, v, w, _g, m, n, one, t fr.Element
+	t.SetUint64(2).
+		Inverse(&t)
+	_g.Square(&domainH.Generator).
+		Exp(_g, big.NewInt(int64(s/2-1)))
+	one.SetOne()
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	g := make([]fr.Element, s)
+	g[0].Set(&domainH.FinerGenerator)
+	for i := 1; i < s; i++ {
+		g[i].Mul(&g[i-1], &domainH.Generator)
+	}
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		// m = (x-g**(n-1))*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX))
+		m.Mul(&v, &_lz[_i])
+		u.Add(&gamma, &_lf[_i])
+		m.Mul(&m, &u)
+		u.Mul(&beta, &_lt[_is]).
+			Add(&u, &_lt[_i]).
+			Add(&u, &w)
+		m.Mul(&m, &u)
+
+		// n = (x-g**(n-1))*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX)
+		n.Mul(&beta, &_lh1[_is]).
+			Add(&n, &_lh1[_i]).
+			Add(&n, &w)
+		u.Mul(&beta, &_lh2[_is]).
+			Add(&u, &_lh2[_i]).
+			Add(&u, &w)
+		n.Mul(&n, &u).
+			Mul(&n, &_lz[_is])
+
+		num[_i].Sub(&m, &n)
+		u.Sub(&g[i], &_g)
+		num[_i].Mul(&num[_i], &u)
+
+	}
+
+	return num
+}
+
+// computeH0 returns l0 * (z-1), in Lagrange basis and bit reversed order
+func computeH0(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(den); i++ {
+		den[i].Sub(&d, &one)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHn returns ln * (z-1), in Lagrange basis and bit reversed order
+func computeHn(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	one.SetOne()
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(lzCosetReversed); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHh1h2 returns ln * (h1 - h2(g.x)), in Lagrange basis and bit reversed order
+func computeHh1h2(_lh1, _lh2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(_lh1))
+	for i := 0; i < len(_lh1); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(_lh1))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	s := len(_lh1)
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		res[_i].Sub(&_lh1[_i], &_lh2[_is]).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeQuotient computes the full quotient of the plookup protocol.
+// * alpha is the challenge to fold the numerator
+// * lh, lh0, lhn, lh1h2 are the various pieces of the numerator (Lagrange shifted form, bit reversed order)
+// * domainH fft domain
+// It returns the quotient, in canonical basis
+func computeQuotient(alpha fr.Element, lh, lh0, lhn, lh1h2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	s := len(lh)
+	res := make([]fr.Element, s)
+
+	var one fr.Element
+	one.SetOne()
+
+	var d [2]fr.Element
+	d[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality>>1)))
+	d[1].Neg(&d[0])
+	d[0].Sub(&d[0], &one).Inverse(&d[0])
+	d[1].Sub(&d[1], &one).Inverse(&d[1])
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+
+		res[_i].Mul(&lh1h2[_i], &alpha).
+			Add(&res[_i], &lhn[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh0[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh[_i]).
+			Mul(&res[_i], &d[i%2])
+	}
+
+	domainH.FFTInverse(res, fft.DIT, 1)
+
+	return res
+}
+
+// ProveLookupVector returns proof that the values in f are in t.
+//
+// /!\IMPORTANT/!\
+//
+// If the table t is already commited somewhere (which is the normal workflow
+// before generating a lookup proof), the commitment needs to be done on the
+// table sorted. Otherwise the commitment in proof.t will not be the same as
+// the public commitment: it will contain the same values, but permuted.
+//
+func ProveLookupVector(srs *kzg.SRS, f, t Table) (ProofLookupVector, error) {
+
+	// res
+	var proof ProofLookupVector
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// create domains
+	var dNum *fft.Domain
+	if len(t) <= len(f) {
+		dNum = fft.NewDomain(uint64(len(f)+1), 0, false)
+	} else {
+		dNum = fft.NewDomain(uint64(len(t)), 0, false)
+	}
+	cardDNum := int(dNum.Cardinality)
+
+	// set the size
+	proof.size = dNum.Cardinality
+
+	// resize f and t
+	// note: the last element of lf does not matter
+	lf := make([]fr.Element, cardDNum)
+	lt := make([]fr.Element, cardDNum)
+	cf := make([]fr.Element, cardDNum)
+	ct := make([]fr.Element, cardDNum)
+	copy(lt, t)
+	copy(lf, f)
+	for i := len(f); i < cardDNum; i++ {
+		lf[i] = f[len(f)-1]
+	}
+	for i := len(t); i < cardDNum; i++ {
+		lt[i] = t[len(t)-1]
+	}
+	sort.Sort(Table(lt))
+	copy(ct, lt)
+	copy(cf, lf)
+	dNum.FFTInverse(ct, fft.DIF, 0)
+	dNum.FFTInverse(cf, fft.DIF, 0)
+	fft.BitReverse(ct)
+	fft.BitReverse(cf)
+	proof.t, err = kzg.Commit(ct, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.f, err = kzg.Commit(cf, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// write f sorted by t
+	lfSortedByt := make(Table, 2*dNum.Cardinality-1)
+	copy(lfSortedByt, lt)
+	copy(lfSortedByt[dNum.Cardinality:], lf)
+	sort.Sort(lfSortedByt)
+
+	// compute h1, h2, commit to them
+	lh1 := make([]fr.Element, cardDNum)
+	lh2 := make([]fr.Element, cardDNum)
+	ch1 := make([]fr.Element, cardDNum)
+	ch2 := make([]fr.Element, cardDNum)
+	copy(lh1, lfSortedByt[:cardDNum])
+	copy(lh2, lfSortedByt[cardDNum-1:])
+
+	copy(ch1, lfSortedByt[:cardDNum])
+	copy(ch2, lfSortedByt[cardDNum-1:])
+	dNum.FFTInverse(ch1, fft.DIF, 0)
+	dNum.FFTInverse(ch2, fft.DIF, 0)
+	fft.BitReverse(ch1)
+	fft.BitReverse(ch2)
+
+	proof.h1, err = kzg.Commit(ch1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.h2, err = kzg.Commit(ch2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive beta, gamma
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return proof, err
+	}
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return proof, err
+	}
+
+	// Compute to Z
+	lz := computeZ(lf, lt, lh1, lh2, beta, gamma)
+	cz := make([]fr.Element, len(lz))
+	copy(cz, lz)
+	dNum.FFTInverse(cz, fft.DIF, 0)
+	fft.BitReverse(cz)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// prepare data for computing the quotient
+	// compute the numerator
+	s := dNum.Cardinality
+	domainH := fft.NewDomain(uint64(2*s), 1, false)
+	_lz := make([]fr.Element, 2*s)
+	_lh1 := make([]fr.Element, 2*s)
+	_lh2 := make([]fr.Element, 2*s)
+	_lt := make([]fr.Element, 2*s)
+	_lf := make([]fr.Element, 2*s)
+	copy(_lz, cz)
+	copy(_lh1, ch1)
+	copy(_lh2, ch2)
+	copy(_lt, ct)
+	copy(_lf, cf)
+	domainH.FFT(_lz, fft.DIF, 1)
+	domainH.FFT(_lh1, fft.DIF, 1)
+	domainH.FFT(_lh2, fft.DIF, 1)
+	domainH.FFT(_lt, fft.DIF, 1)
+	domainH.FFT(_lf, fft.DIF, 1)
+
+	// compute h
+	lh := computeH(_lz, _lh1, _lh2, _lt, _lf, beta, gamma, domainH)
+
+	// compute h0
+	lh0 := computeH0(_lz, domainH)
+
+	// compute hn
+	lhn := computeHn(_lz, domainH)
+
+	// compute hh1h2
+	lh1h2 := computeHh1h2(_lh1, _lh2, domainH)
+
+	// compute the quotient
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+	ch := computeQuotient(alpha, lh, lh0, lhn, lh1h2, domainH)
+	proof.h, err = kzg.Commit(ch, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// build the opening proofs
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return proof, err
+	}
+	proof.BatchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+			cf,
+			ch,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	nu.Mul(&nu, &dNum.Generator)
+	proof.BatchedProofShifted, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	return proof, nil
+}
+
+// VerifyLookupVector verifies that a ProofLookupVector proof is correct
+func VerifyLookupVector(srs *kzg.SRS, proof ProofLookupVector) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// derive the various challenges
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return err
+	}
+
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return err
+	}
+
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return err
+	}
+
+	// check opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&proof.BatchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&proof.BatchedProofShifted,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	// check polynomial relation using Schwartz Zippel
+	var lhs, rhs, nun, g, _g, a, v, w, one fr.Element
+	d := fft.NewDomain(proof.size, 0, false) // only there to access to root of 1...
+	one.SetOne()
+	g.Exp(d.Generator, big.NewInt(int64(d.Cardinality-1)))
+
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	// h(nu) where
+	// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+	//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+	lhs.Sub(&nu, &g).
+		Mul(&lhs, &proof.BatchedProof.ClaimedValues[3]).
+		Mul(&lhs, &v)
+	a.Add(&gamma, &proof.BatchedProof.ClaimedValues[4])
+	lhs.Mul(&lhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[2]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[2]).
+		Add(&a, &w)
+	lhs.Mul(&lhs, &a)
+
+	rhs.Sub(&nu, &g).
+		Mul(&rhs, &proof.BatchedProofShifted.ClaimedValues[3])
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[0]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[0]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[1]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[1]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+
+	lhs.Sub(&lhs, &rhs)
+
+	// check consistancy of bounds
+	var l0, ln, d1, d2 fr.Element
+	l0.Exp(nu, big.NewInt(int64(d.Cardinality))).Sub(&l0, &one)
+	ln.Set(&l0)
+	d1.Sub(&nu, &one)
+	d2.Sub(&nu, &g)
+	l0.Div(&l0, &d1)
+	ln.Div(&ln, &d2)
+
+	// l0*(z-1)
+	var l0z fr.Element
+	l0z.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&l0z, &l0)
+
+	// ln*(z-1)
+	var lnz fr.Element
+	lnz.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&ln, &lnz)
+
+	// ln*(h1 - h2(g.x))
+	var lnh1h2 fr.Element
+	lnh1h2.Sub(&proof.BatchedProof.ClaimedValues[0], &proof.BatchedProofShifted.ClaimedValues[1]).
+		Mul(&lnh1h2, &ln)
+
+	// fold the numerator
+	lnh1h2.Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lnz).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &l0z).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lhs)
+
+	// (x**n-1) * h(x) evaluated at nu
+	nun.Exp(nu, big.NewInt(int64(d.Cardinality)))
+	_g.Sub(&nun, &one)
+	_g.Mul(&proof.BatchedProof.ClaimedValues[5], &_g)
+	if !lnh1h2.Equal(&_g) {
+		return ErrPlookupVerification
+	}
+
+	return nil
+}
diff --git a/ecc/bw6-756/fr/polynomial/doc.go b/ecc/bw6-756/fr/polynomial/doc.go
new file mode 100644
index 000000000..83479b058
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package polynomial provides polynomial methods and commitment schemes.
+package polynomial
diff --git a/ecc/bw6-756/fr/polynomial/polynomial.go b/ecc/bw6-756/fr/polynomial/polynomial.go
new file mode 100644
index 000000000..7f90c5e66
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/polynomial.go
@@ -0,0 +1,123 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// Polynomial polynomial represented by coefficients bn254 fr field.
+type Polynomial []fr.Element
+
+// Degree returns the degree of the polynomial, which is the length of Data.
+func (p *Polynomial) Degree() uint64 {
+	return uint64(len(*p) - 1)
+}
+
+// Eval evaluates p at v
+// returns a fr.Element
+func (p *Polynomial) Eval(v *fr.Element) fr.Element {
+
+	res := (*p)[len(*p)-1]
+	for i := len(*p) - 2; i >= 0; i-- {
+		res.Mul(&res, v)
+		res.Add(&res, &(*p)[i])
+	}
+
+	return res
+}
+
+// Clone returns a copy of the polynomial
+func (p *Polynomial) Clone() Polynomial {
+	_p := make(Polynomial, len(*p))
+	copy(_p, *p)
+	return _p
+}
+
+// AddConstantInPlace adds a constant to the polynomial, modifying p
+func (p *Polynomial) AddConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Add(&(*p)[i], c)
+	}
+}
+
+// SubConstantInPlace subs a constant to the polynomial, modifying p
+func (p *Polynomial) SubConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Sub(&(*p)[i], c)
+	}
+}
+
+// ScaleInPlace multiplies p by v, modifying p
+func (p *Polynomial) ScaleInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Mul(&(*p)[i], c)
+	}
+}
+
+// Add adds p1 to p2
+// This function allocates a new slice unless p == p1 or p == p2
+func (p *Polynomial) Add(p1, p2 Polynomial) *Polynomial {
+
+	bigger := p1
+	smaller := p2
+	if len(bigger) < len(smaller) {
+		bigger, smaller = smaller, bigger
+	}
+
+	if len(*p) == len(bigger) && (&(*p)[0] == &bigger[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &smaller[i])
+		}
+		return p
+	}
+
+	if len(*p) == len(smaller) && (&(*p)[0] == &smaller[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &bigger[i])
+		}
+		*p = append(*p, bigger[len(smaller):]...)
+		return p
+	}
+
+	res := make(Polynomial, len(bigger))
+	copy(res, bigger)
+	for i := 0; i < len(smaller); i++ {
+		res[i].Add(&res[i], &smaller[i])
+	}
+	*p = res
+	return p
+}
+
+// Equal checks equality between two polynomials
+func (p *Polynomial) Equal(p1 Polynomial) bool {
+	if (*p == nil) != (p1 == nil) {
+		return false
+	}
+
+	if len(*p) != len(p1) {
+		return false
+	}
+
+	for i := range p1 {
+		if !(*p)[i].Equal(&p1[i]) {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/ecc/bw6-756/fr/polynomial/polynomial_test.go b/ecc/bw6-756/fr/polynomial/polynomial_test.go
new file mode 100644
index 000000000..9a1298763
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/polynomial_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+func TestPolynomialEval(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// random value
+	var point fr.Element
+	point.SetRandom()
+
+	// compute manually f(val)
+	var expectedEval, one, den fr.Element
+	var expo big.Int
+	one.SetOne()
+	expo.SetUint64(20)
+	expectedEval.Exp(point, &expo).
+		Sub(&expectedEval, &one)
+	den.Sub(&point, &one)
+	expectedEval.Div(&expectedEval, &den)
+
+	// compute purported evaluation
+	purportedEval := f.Eval(&point)
+
+	// check
+	if !purportedEval.Equal(&expectedEval) {
+		t.Fatal("polynomial evaluation failed")
+	}
+}
+
+func TestPolynomialAddConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to add
+	var c fr.Element
+	c.SetRandom()
+
+	// add constant
+	f.AddConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Add(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("AddConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialSubConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to sub
+	var c fr.Element
+	c.SetRandom()
+
+	// sub constant
+	f.SubConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Sub(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("SubConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialScaleInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to scale by
+	var c fr.Element
+	c.SetRandom()
+
+	// scale by constant
+	f.ScaleInPlace(&c)
+
+	// check
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&c) {
+			t.Fatal("ScaleInPlace failed")
+		}
+	}
+
+}
+
+func TestPolynomialAdd(t *testing.T) {
+
+	// build unbalanced polynomials
+	f1 := make(Polynomial, 20)
+	f1Backup := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f1[i].SetOne()
+		f1Backup[i].SetOne()
+	}
+	f2 := make(Polynomial, 10)
+	f2Backup := make(Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f2[i].SetOne()
+		f2Backup[i].SetOne()
+	}
+
+	// expected result
+	var one, two fr.Element
+	one.SetOne()
+	two.Double(&one)
+	expectedSum := make(Polynomial, 20)
+	for i := 0; i < 10; i++ {
+		expectedSum[i].Set(&two)
+	}
+	for i := 10; i < 20; i++ {
+		expectedSum[i].Set(&one)
+	}
+
+	// caller is empty
+	var g Polynomial
+	g.Add(f1, f2)
+	if !g.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// all operands are distincts
+	_f1 := f1.Clone()
+	_f1.Add(f1, f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// first operand = caller
+	_f1 = f1.Clone()
+	_f2 := f2.Clone()
+	_f1.Add(_f1, _f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+
+	// second operand = caller
+	_f1 = f1.Clone()
+	_f2 = f2.Clone()
+	_f1.Add(_f2, _f1)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+}
diff --git a/ecc/bw6-756/fuzz.go b/ecc/bw6-756/fuzz.go
new file mode 100644
index 000000000..f00846392
--- /dev/null
+++ b/ecc/bw6-756/fuzz.go
@@ -0,0 +1,76 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/mimc"
+	"math/big"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	// TODO separate in multiple FuzzXXX and update continuous fuzzer scripts
+	// else, we don't really benefits for fuzzer strategy.
+	fr.Fuzz(data)
+	fp.Fuzz(data)
+	mimc.Fuzz(data)
+
+	// fuzz pairing
+	r := bytes.NewReader(data)
+	var e1, e2 fr.Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		var r, r1, r2, r1r2, zero GT
+		var b1, b2, b1b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		e2.ToBigIntRegular(&b2)
+		b1b2.Mul(&b1, &b2)
+
+		var p1 G1Affine
+		var p2 G2Affine
+
+		p1.ScalarMultiplication(&g1GenAff, &b1)
+		p2.ScalarMultiplication(&g2GenAff, &b2)
+
+		r, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+		r1, _ = Pair([]G1Affine{p1}, []G2Affine{g2GenAff})
+		r2, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{p2})
+
+		r1r2.Exp(&r, b1b2)
+		r1.Exp(&r1, b2)
+		r2.Exp(&r2, b1)
+
+		if !(r1r2.Equal(&r1) && r1r2.Equal(&r2) && !r.Equal(&zero)) {
+			panic("pairing bilinearity check failed")
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fuzz_test.go b/ecc/bw6-756/fuzz_test.go
new file mode 100644
index 000000000..583d7dece
--- /dev/null
+++ b/ecc/bw6-756/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/g1.go b/ecc/bw6-756/g1.go
new file mode 100644
index 000000000..2a1f0a5bf
--- /dev/null
+++ b/ecc/bw6-756/g1.go
@@ -0,0 +1,1081 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G1Affine point in affine coordinates
+type G1Affine struct {
+	X, Y fp.Element
+}
+
+// G1Jac is a point with fp.Element coordinates
+type G1Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g1JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g1JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// g1Proj point in projective coordinates
+type g1Proj struct {
+	x, y, z fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G1Affine) Set(a *G1Affine) *G1Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G1Affine) ScalarMultiplication(a *G1Affine, s *big.Int) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Add(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Sub(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G1Affine) Equal(a *G1Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G1Affine) Neg(a *G1Affine) *G1Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G1Affine) FromJacobian(p1 *G1Jac) *G1Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G1Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G1Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Affine) IsOnCurve() bool {
+	var point G1Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G1Affine) IsInSubGroup() bool {
+	var _p G1Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G1Jac) Set(a *G1Jac) *G1Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G1Jac) Equal(a *G1Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+
+	_a := G1Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G1Jac) Neg(a *G1Jac) *G1Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G1Jac) SubAssign(a *G1Jac) *G1Jac {
+	var tmp G1Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G1Jac) AddAssign(a *G1Jac) *G1Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G1Jac) AddMixed(a *G1Affine) *G1Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) Double(q *G1Jac) *G1Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) DoubleAssign() *G1Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) ScalarMultiplication(a *G1Jac, s *big.Int) *G1Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G1Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G1Jac) FromAffine(Q *G1Affine) *G1Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG1Affine, 1] is the kernel
+// of (u,v)->u+lambdaG1Affinev mod r. Expressing r, lambdaG1Affine as
+// polynomials in x, a short vector of this Zmodule is
+// (x+1), (x**3-x**2+1). So we check that (x+1)p+(x**3-x**2+1)*phi(p)
+// is the infinity.
+func (p *G1Jac) IsInSubGroup() bool {
+
+	var res, phip G1Jac
+	phip.phi(p)
+	res.ScalarMultiplication(&phip, &xGen).
+		SubAssign(&phip).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(&phip)
+
+	phip.ScalarMultiplication(p, &xGen).AddAssign(p).AddAssign(&res)
+
+	return phip.IsOnCurve() && phip.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G1Jac) mulWindowed(a *G1Jac, s *big.Int) *G1Jac {
+
+	var res G1Jac
+	var ops [3]G1Jac
+
+	res.Set(&g1Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G1Jac) phi(a *G1Jac) *G1Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG1)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
+
+	var table [15]G1Jac
+	var res G1Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g1Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in E(Fp) to E(Fp)[r]
+func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
+
+	var L0, L1, uP, u2P, u3P, tmp G1Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+
+	L0.Set(a).AddAssign(&u3P).
+		SubAssign(&u2P)
+	tmp.Set(a).AddAssign(&u2P).
+		SubAssign(&uP).
+		SubAssign(&uP).
+		Double(&tmp)
+	L0.SubAssign(&tmp).
+		SubAssign(a)
+
+	L1.Set(a).AddAssign(&uP)
+	tmp.Set(&uP).SubAssign(a).
+		Double(&tmp).
+		SubAssign(&u2P)
+	L1.AddAssign(&tmp).
+		SubAssign(a)
+
+	p.phi(&L1).
+		AddAssign(&L0)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g1JacExtended) Set(a *g1JacExtended) *g1JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g1JacExtended) setInfinity() *g1JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G1Affine) fromJacExtended(Q *g1JacExtended) *G1Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G1Jac) fromJacExtended(Q *g1JacExtended) *G1Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g1Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G1Jac) unsafeFromJacExtended(Q *g1JacExtended) *G1Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g1JacExtended) add(q *g1JacExtended) *g1JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) double(q *g1JacExtended) *g1JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) subMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) addMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g1JacExtended) doubleNegMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) doubleMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// -------------------------------------------------------------------------------------------------
+// Homogenous projective
+
+// Set sets p to the provided point
+func (p *g1Proj) Set(a *g1Proj) *g1Proj {
+	p.x, p.y, p.z = a.x, a.y, a.z
+	return p
+}
+
+// Neg computes -G
+func (p *g1Proj) Neg(a *g1Proj) *g1Proj {
+	*p = *a
+	p.y.Neg(&a.y)
+	return p
+}
+
+// FromJacobian converts a point from Jacobian to projective coordinates
+func (p *g1Proj) FromJacobian(Q *G1Jac) *g1Proj {
+	var buf fp.Element
+	buf.Square(&Q.Z)
+
+	p.x.Mul(&Q.X, &Q.Z)
+	p.y.Set(&Q.Y)
+	p.z.Mul(&Q.Z, &buf)
+
+	return p
+}
+
+// FromAffine sets p = Q, p in homogenous projective, Q in affine
+func (p *g1Proj) FromAffine(Q *G1Affine) *g1Proj {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.z.SetZero()
+		p.x.SetOne()
+		p.y.SetOne()
+		return p
+	}
+	p.z.SetOne()
+	p.x.Set(&Q.X)
+	p.y.Set(&Q.Y)
+	return p
+}
+
+// BatchProjectiveToAffineG1 converts points in Projective coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchProjectiveToAffineG1(points []g1Proj, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			a := result[i].X
+			result[i].X.Mul(&points[i].x, &a)
+			result[i].Y.Mul(&points[i].y, &a)
+		}
+	})
+}
+
+// BatchJacobianToAffineG1 converts points in Jacobian coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchJacobianToAffineG1(points []G1Jac, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].Z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].Z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].Z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			var a, b fp.Element
+			a = result[i].X
+			b.Square(&a)
+			result[i].X.Mul(&points[i].X, &b)
+			result[i].Y.Mul(&points[i].Y, &b).
+				Mul(&result[i].Y, &a)
+		}
+	})
+
+}
+
+// BatchScalarMultiplicationG1 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G1Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g1Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	// convert our base exp table into affine to use AddMixed
+	baseTableAff := make([]G1Affine, (1 << (c - 1)))
+	BatchJacobianToAffineG1(baseTable, baseTableAff)
+	toReturn := make([]G1Jac, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G1Jac
+		for i := start; i < end; i++ {
+			p.Set(&g1Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddMixed(&baseTableAff[bits-1])
+				} else {
+					// sub
+					t := baseTableAff[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddMixed(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i] = p
+
+		}
+	})
+	toReturnAff := make([]G1Affine, len(scalars))
+	BatchJacobianToAffineG1(toReturn, toReturnAff)
+	return toReturnAff
+}
diff --git a/ecc/bw6-756/g1_test.go b/ecc/bw6-756/g1_test.go
new file mode 100644
index 000000000..a38dbeb3a
--- /dev/null
+++ b/ecc/bw6-756/g1_test.go
@@ -0,0 +1,664 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG1AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G1Jac
+			p = fuzzJacobianG1Affine(&g1Gen, a)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G1Jac
+			p = fuzzJacobianG1Affine(&g1Gen, a)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG1Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG1Svdw(a)
+			g2 := MapToCurveG1Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] g1Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Affine
+			op1.FromJacobian(&g1Gen)
+			op2.FromJacobian(&g1Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] g1Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G1Jac
+			op1.Set(&g1Gen)
+			op3.Set(&g1Gen)
+
+			op2 = fuzzJacobianG1Affine(&g1Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG1Affine(&g1Gen, a)
+			var op1 G1Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g1JacExtended
+			g.X.Set(&g1Gen.X)
+			g.Y.Set(&g1Gen.Y)
+			g.ZZ.Set(&g1Gen.Z)
+			g.ZZZ.Set(&g1Gen.Z)
+			gfuzz := fuzzExtendedJacobianG1Affine(&g, a)
+
+			var op1 G1Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G1Jac
+			var op1 G1Affine
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g1Gen.X) && g.Y.Equal(&g1Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G1Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			var op1 g1JacExtended
+			var zero fp.Element
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Jac
+			var op1 g1JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG1Affine(&g1Gen, a)
+			op2 := fuzzJacobianG1Affine(&g1Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BW6-756] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			var op1, op2 G1Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.AddAssign(&g1Infinity)
+			var op2 G1Jac
+			op2.Set(&g1Infinity)
+			op2.AddAssign(&g1Gen)
+			return fop1.Equal(&g1Gen) && op2.Equal(&g1Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G1Affine
+			op2.FromJacobian(&g1Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g1Gen, &scalar)
+			op2.mulWindowed(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g1Gen, &scalar)
+			op2.ScalarMultiplication(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G1Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g1Gen, &r)
+			op2.mulGLV(&g1Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g1Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G1Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g1Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG1AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BW6-756] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G1Jac
+				var expected G1Affine
+				var b big.Int
+				expectedJac.mulGLV(&g1Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG1JacIsInSubGroup(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG1AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG1JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G1Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g1Gen, &scalar)
+		}
+	})
+
+	var glv G1Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g1Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG1AffineCofactorClearing(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG1JacAdd(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g1Gen)
+	}
+}
+
+func BenchmarkG1JacAddMixed(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG1JacDouble(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG1JacExtAddMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtSubMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleNegMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtAdd(b *testing.B) {
+	var a, c g1JacExtended
+	a.doubleMixed(&g1GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG1JacExtDouble(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG1Affine(p *G1Jac, f fp.Element) G1Jac {
+	var res G1Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG1Affine(p *g1JacExtended, f fp.Element) g1JacExtended {
+	var res g1JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bw6-756/g2.go b/ecc/bw6-756/g2.go
new file mode 100644
index 000000000..934f7d6f1
--- /dev/null
+++ b/ecc/bw6-756/g2.go
@@ -0,0 +1,933 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G2Affine point in affine coordinates
+type G2Affine struct {
+	X, Y fp.Element
+}
+
+// G2Jac is a point with fp.Element coordinates
+type G2Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g2JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g2JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G2Affine) Set(a *G2Affine) *G2Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G2Affine) ScalarMultiplication(a *G2Affine, s *big.Int) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Add(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Sub(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G2Affine) Equal(a *G2Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G2Affine) Neg(a *G2Affine) *G2Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G2Affine) FromJacobian(p1 *G2Jac) *G2Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G2Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G2Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Affine) IsOnCurve() bool {
+	var point G2Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G2Affine) IsInSubGroup() bool {
+	var _p G2Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G2Jac) Set(a *G2Jac) *G2Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G2Jac) Equal(a *G2Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+
+	_a := G2Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G2Jac) Neg(a *G2Jac) *G2Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G2Jac) SubAssign(a *G2Jac) *G2Jac {
+	var tmp G2Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G2Jac) AddAssign(a *G2Jac) *G2Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G2Jac) AddMixed(a *G2Affine) *G2Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) Double(q *G2Jac) *G2Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) DoubleAssign() *G2Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) ScalarMultiplication(a *G2Jac, s *big.Int) *G2Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G2Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G2Jac) FromAffine(Q *G2Affine) *G2Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bTwistCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG2Affine, 1] is the kernel
+// of (u,v)->u+lambdaG2Affinev mod r. Expressing r, lambdaG2Affine as
+// polynomials in x, a short vector of this Zmodule is
+// (x+1), (x**3-x**2+1). So we check that (x+1)p+(x**3-x**2+1)*phi(p)
+// is the infinity.
+func (p *G2Jac) IsInSubGroup() bool {
+
+	var res, phip G2Jac
+	phip.phi(p)
+	res.ScalarMultiplication(&phip, &xGen).
+		SubAssign(&phip).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(&phip)
+
+	phip.ScalarMultiplication(p, &xGen).AddAssign(p).AddAssign(&res)
+
+	return phip.IsOnCurve() && phip.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G2Jac) mulWindowed(a *G2Jac, s *big.Int) *G2Jac {
+
+	var res G2Jac
+	var ops [3]G2Jac
+
+	res.Set(&g2Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G2Jac) phi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG2)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
+
+	var table [15]G2Jac
+	var res G2Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g2Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
+
+	var L0, L1, uP, u2P, u3P, tmp G2Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+	// ht=-2, hy=0
+	// d1=1, d2=-1, d3=-1
+
+	L0.Set(a).
+		AddAssign(&u2P).
+		SubAssign(&uP)
+	tmp.Set(&u2P).
+		AddAssign(a).
+		SubAssign(&uP).
+		Double(&tmp)
+	L1.Set(&u3P).
+		SubAssign(&tmp)
+
+	p.phi(&L0).
+		AddAssign(&L1)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g2JacExtended) Set(a *g2JacExtended) *g2JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g2JacExtended) setInfinity() *g2JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G2Affine) fromJacExtended(Q *g2JacExtended) *G2Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G2Jac) fromJacExtended(Q *g2JacExtended) *G2Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g2Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G2Jac) unsafeFromJacExtended(Q *g2JacExtended) *G2Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g2JacExtended) add(q *g2JacExtended) *g2JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) double(q *g2JacExtended) *g2JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) subMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) addMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g2JacExtended) doubleNegMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) doubleMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// BatchScalarMultiplicationG2 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG2(base *G2Affine, scalars []fr.Element) []G2Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G2Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g2Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	toReturn := make([]G2Affine, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G2Jac
+		for i := start; i < end; i++ {
+			p.Set(&g2Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddAssign(&baseTable[bits-1])
+				} else {
+					// sub
+					t := baseTable[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddAssign(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i].FromJacobian(&p)
+
+		}
+	})
+	return toReturn
+}
diff --git a/ecc/bw6-756/g2_test.go b/ecc/bw6-756/g2_test.go
new file mode 100644
index 000000000..c2fe39936
--- /dev/null
+++ b/ecc/bw6-756/g2_test.go
@@ -0,0 +1,664 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG2AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G2Jac
+			p = fuzzJacobianG2Affine(&g2Gen, a)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G2Jac
+			p = fuzzJacobianG2Affine(&g2Gen, a)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG2Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG2Svdw(a)
+			g2 := MapToCurveG2Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] g2Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G2Affine
+			op1.FromJacobian(&g2Gen)
+			op2.FromJacobian(&g2Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] g2Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G2Jac
+			op1.Set(&g2Gen)
+			op3.Set(&g2Gen)
+
+			op2 = fuzzJacobianG2Affine(&g2Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG2Affine(&g2Gen, a)
+			var op1 G2Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g2JacExtended
+			g.X.Set(&g2Gen.X)
+			g.Y.Set(&g2Gen.Y)
+			g.ZZ.Set(&g2Gen.Z)
+			g.ZZZ.Set(&g2Gen.Z)
+			gfuzz := fuzzExtendedJacobianG2Affine(&g, a)
+
+			var op1 G2Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G2Jac
+			var op1 G2Affine
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g2Gen.X) && g.Y.Equal(&g2Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G2Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			var op1 g2JacExtended
+			var zero fp.Element
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Jac
+			var op1 g2JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG2Affine(&g2Gen, a)
+			op2 := fuzzJacobianG2Affine(&g2Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BW6-756] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			var op1, op2 G2Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.AddAssign(&g2Infinity)
+			var op2 G2Jac
+			op2.Set(&g2Infinity)
+			op2.AddAssign(&g2Gen)
+			return fop1.Equal(&g2Gen) && op2.Equal(&g2Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G2Affine
+			op2.FromJacobian(&g2Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g2Gen, &scalar)
+			op2.mulWindowed(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g2Gen, &scalar)
+			op2.ScalarMultiplication(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G2Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g2Gen, &r)
+			op2.mulGLV(&g2Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g2Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G2Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g2Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG2AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BW6-756] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G2Jac
+				var expected G2Affine
+				var b big.Int
+				expectedJac.mulGLV(&g2Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG2JacIsInSubGroup(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG2AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG2JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G2Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g2Gen, &scalar)
+		}
+	})
+
+	var glv G2Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g2Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG2AffineCofactorClearing(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG2JacAdd(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g2Gen)
+	}
+}
+
+func BenchmarkG2JacAddMixed(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG2JacDouble(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG2JacExtAddMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtSubMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleNegMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtAdd(b *testing.B) {
+	var a, c g2JacExtended
+	a.doubleMixed(&g2GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG2JacExtDouble(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG2Affine(p *G2Jac, f fp.Element) G2Jac {
+	var res G2Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG2Affine(p *g2JacExtended, f fp.Element) g2JacExtended {
+	var res g2JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bw6-756/hash_to_curve.go b/ecc/bw6-756/hash_to_curve.go
new file mode 100644
index 000000000..f98291369
--- /dev/null
+++ b/ecc/bw6-756/hash_to_curve.go
@@ -0,0 +1,262 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+)
+
+// hashToFp hashes msg to count prime field elements.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
+func hashToFp(msg, dst []byte, count int) ([]fp.Element, error) {
+
+	// 128 bits of security
+	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
+	L := 64
+
+	lenInBytes := count * L
+	pseudoRandomBytes, err := ecc.ExpandMsgXmd(msg, dst, lenInBytes)
+	if err != nil {
+		return nil, err
+	}
+
+	res := make([]fp.Element, count)
+	for i := 0; i < count; i++ {
+		res[i].SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
+	}
+	return res, nil
+}
+
+// returns false if u>-u when seen as a bigInt
+func sign0(u fp.Element) bool {
+	var a, b big.Int
+	u.ToBigIntRegular(&a)
+	u.Neg(&u)
+	u.ToBigIntRegular(&b)
+	return a.Cmp(&b) <= 0
+}
+
+// ----------------------------------------------------------------------------------------
+// G1Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG1(u fp.Element) G1Affine {
+
+	var res G1Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetUint64(2)
+	c2.SetString("183162695478688143295363277863609973912688910644623094139398704891720872678025228163994673580388732642095427562821043430262853248964259401622004374680181856276883253377613672296702199391943428932630544113135668167634206718951424")
+	c3.SetString("44724918635541747613641388754122468373124590147190123301097028599548226344531294896393070481114654454458200984945494713184328466922237226071123682508438597467447523336749298101207214456752293854953034448110052217888788253840855")
+	c4.SetString("244216927304917524393817703818146631883585214192830792185864939855627830237366970885326231440518310189460570083761391240350470998619012535496005832906909141702511004503484896395602932522591238576840725484180890890178942291935230")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bCurveCoeff)
+	E2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if E2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG1Svdw(t fp.Element) G1Affine {
+	res := svdwMapG1(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG1Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG1Svdw(u[0])
+	Q1 := MapToCurveG1Svdw(u[1])
+	var _Q0, _Q1, _res G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
+
+// ----------------------------------------------------------------------------------------
+// G2Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG2(u fp.Element) G2Affine {
+
+	var res G2Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetString("34")
+	c2.SetString("183162695478688143295363277863609973912688910644623094139398704891720872678025228163994673580388732642095427562821043430262853248964259401622004374680181856276883253377613672296702199391943428932630544113135668167634206718951424")
+	c3.SetString("146433077860977604767773731846846066128329453951205949309133381060660557769470436688115507478076238051302460346380778731189432948203027606405253627801253330472118089084289142874549020683770046250396163515340816807916474328196956")
+	c4.SetString("122108463652458762196908851909073315941792607096415396092932469927813915118683485442663115720259155094730285041880695620175235499309506267748002916453454570851255502251742448197801466261295619288420362742090445445089471145967571")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bTwistCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bTwistCurveCoeff)
+	E2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if E2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bTwistCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG2Svdw(t fp.Element) G2Affine {
+	res := svdwMapG2(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG2Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG2Svdw(u[0])
+	Q1 := MapToCurveG2Svdw(u[1])
+	var _Q0, _Q1, _res G2Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
diff --git a/ecc/bw6-756/internal/fptower/e3.go b/ecc/bw6-756/internal/fptower/e3.go
new file mode 100644
index 000000000..81bf3b878
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e3.go
@@ -0,0 +1,299 @@
+// Copyright 2020 ConsenSys AG
+//
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+)
+
+// E3 is a degree-three finite field extension of fp3
+type E3 struct {
+	A0, A1, A2 fp.Element
+}
+
+// Equal returns true if z equals x, fasle otherwise
+// note this is more efficient than calling "z == x"
+func (z *E3) Equal(x *E3) bool {
+	return z.A0.Equal(&x.A0) && z.A1.Equal(&x.A1) && z.A2.Equal(&x.A2)
+}
+
+// SetString sets a E3 elmt from string
+func (z *E3) SetString(s1, s2, s3 string) *E3 {
+	z.A0.SetString(s1)
+	z.A1.SetString(s2)
+	z.A2.SetString(s3)
+	return z
+}
+
+// SetZero sets an E3 elmt to zero
+func (z *E3) SetZero() *E3 {
+	*z = E3{}
+	return z
+}
+
+// Clone returns a copy of self
+func (z *E3) Clone() *E3 {
+	return &E3{
+		A0: z.A0,
+		A1: z.A1,
+		A2: z.A2,
+	}
+}
+
+// Set Sets a E3 elmt form another E3 elmt
+func (z *E3) Set(x *E3) *E3 {
+	*z = *x
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E3) SetOne() *E3 {
+	z.A0.SetOne()
+	z.A1.SetZero()
+	z.A2.SetZero()
+	return z
+}
+
+// SetRandom set z to a random elmt
+func (z *E3) SetRandom() (*E3, error) {
+	if _, err := z.A0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A1.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A2.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// IsZero returns true if the two elements are equal, fasle otherwise
+func (z *E3) IsZero() bool {
+	return z.A0.IsZero() && z.A1.IsZero() && z.A2.IsZero()
+}
+
+// Neg negates the E3 number
+func (z *E3) Neg(x *E3) *E3 {
+	z.A0.Neg(&x.A0)
+	z.A1.Neg(&x.A1)
+	z.A2.Neg(&x.A2)
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E3) ToMont() *E3 {
+	z.A0.ToMont()
+	z.A1.ToMont()
+	z.A2.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E3) FromMont() *E3 {
+	z.A0.FromMont()
+	z.A1.FromMont()
+	z.A2.FromMont()
+	return z
+}
+
+// Add adds two elements of E3
+func (z *E3) Add(x, y *E3) *E3 {
+	z.A0.Add(&x.A0, &y.A0)
+	z.A1.Add(&x.A1, &y.A1)
+	z.A2.Add(&x.A2, &y.A2)
+	return z
+}
+
+// Sub two elements of E3
+func (z *E3) Sub(x, y *E3) *E3 {
+	z.A0.Sub(&x.A0, &y.A0)
+	z.A1.Sub(&x.A1, &y.A1)
+	z.A2.Sub(&x.A2, &y.A2)
+	return z
+}
+
+// Double doubles an element in E3
+func (z *E3) Double(x *E3) *E3 {
+	z.A0.Double(&x.A0)
+	z.A1.Double(&x.A1)
+	z.A2.Double(&x.A2)
+	return z
+}
+
+// String puts E3 elmt in string form
+func (z *E3) String() string {
+	return (z.A0.String() + "+(" + z.A1.String() + ")*u+(" + z.A2.String() + ")*u**2")
+}
+
+// Conjugate conjugates an element in E3
+func (z *E3) Conjugate(x *E3) *E3 {
+	*z = *x
+	z.A1.Neg(&z.A1)
+	return z
+}
+
+// MulByElement multiplies an element in E3 by an element in fp
+func (z *E3) MulByElement(x *E3, y *fp.Element) *E3 {
+	_y := *y
+	z.A0.Mul(&x.A0, &_y)
+	z.A1.Mul(&x.A1, &_y)
+	z.A2.Mul(&x.A2, &_y)
+	return z
+}
+
+// MulBy01 multiplication by sparse element (c0,c1,0)
+func (z *E3) MulBy01(c0, c1 *fp.Element) *E3 {
+
+	var a, b, tmp, t0, t1, t2 fp.Element
+
+	a.Mul(&z.A0, c0)
+	b.Mul(&z.A1, c1)
+
+	tmp.Add(&z.A1, &z.A2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+	t0.Add(&t0, &a)
+
+	tmp.Add(&z.A0, &z.A2)
+	t2.Mul(c0, &tmp)
+	t2.Sub(&t2, &a)
+	t2.Add(&t2, &b)
+
+	t1.Add(c0, c1)
+	tmp.Add(&z.A0, &z.A1)
+	t1.Mul(&t1, &tmp)
+	t1.Sub(&t1, &a)
+	t1.Sub(&t1, &b)
+
+	z.A0.Set(&t0)
+	z.A1.Set(&t1)
+	z.A2.Set(&t2)
+
+	return z
+}
+
+// MulBy1 multiplication of E6 by sparse element (0, c1, 0)
+func (z *E3) MulBy1(c1 *fp.Element) *E3 {
+
+	var b, tmp, t0, t1 fp.Element
+	b.Mul(&z.A1, c1)
+
+	tmp.Add(&z.A1, &z.A2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+
+	tmp.Add(&z.A0, &z.A1)
+	t1.Mul(c1, &tmp)
+	t1.Sub(&t1, &b)
+
+	z.A0.Set(&t0)
+	z.A1.Set(&t1)
+	z.A2.Set(&b)
+
+	return z
+}
+
+// Mul sets z to the E3-product of x,y, returns z
+func (z *E3) Mul(x, y *E3) *E3 {
+	// Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
+	var t0, t1, t2, c0, c1, c2, tmp fp.Element
+	t0.Mul(&x.A0, &y.A0)
+	t1.Mul(&x.A1, &y.A1)
+	t2.Mul(&x.A2, &y.A2)
+
+	c0.Add(&x.A1, &x.A2)
+	tmp.Add(&y.A1, &y.A2)
+	c0.Mul(&c0, &tmp).Sub(&c0, &t1).Sub(&c0, &t2).MulByNonResidue(&c0)
+
+	tmp.Add(&x.A0, &x.A2)
+	c2.Add(&y.A0, &y.A2).Mul(&c2, &tmp).Sub(&c2, &t0).Sub(&c2, &t2)
+
+	c1.Add(&x.A0, &x.A1)
+	tmp.Add(&y.A0, &y.A1)
+	c1.Mul(&c1, &tmp).Sub(&c1, &t0).Sub(&c1, &t1)
+	t2.MulByNonResidue(&t2)
+
+	z.A0.Add(&c0, &t0)
+	z.A1.Add(&c1, &t2)
+	z.A2.Add(&c2, &t1)
+
+	return z
+}
+
+// MulAssign sets z to the E3-product of z,y, returns z
+func (z *E3) MulAssign(x *E3) *E3 {
+	return z.Mul(z, x)
+}
+
+// Square sets z to the E3-product of x,x, returns z
+func (z *E3) Square(x *E3) *E3 {
+
+	// Algorithm 16 from https://eprint.iacr.org/2010/354.pdf
+	var c4, c5, c1, c2, c3, c0, c6 fp.Element
+
+	c6.Double(&x.A1)
+	c4.Mul(&x.A0, &c6) // x.A0 * xA1 * 2
+	c5.Square(&x.A2)
+	c1.MulByNonResidue(&c5).Add(&c1, &c4)
+	c2.Sub(&c4, &c5)
+
+	c3.Square(&x.A0)
+	c4.Sub(&x.A0, &x.A1).Add(&c4, &x.A2)
+	c5.Mul(&c6, &x.A2) // x.A1 * xA2 * 2
+	c4.Square(&c4)
+	c0.MulByNonResidue(&c5)
+	c4.Add(&c4, &c5).Sub(&c4, &c3)
+
+	z.A0.Add(&c0, &c3)
+	z.A1 = c1
+	z.A2.Add(&c2, &c4)
+
+	return z
+}
+
+// MulByNonResidue mul x by (0,1,0)
+func (z *E3) MulByNonResidue(x *E3) *E3 {
+	z.A2, z.A1, z.A0 = x.A1, x.A0, x.A2
+	z.A0.MulByNonResidue(&z.A0)
+	return z
+}
+
+// Inverse an element in E3
+func (z *E3) Inverse(x *E3) *E3 {
+	// Algorithm 17 from https://eprint.iacr.org/2010/354.pdf
+	// step 9 is wrong in the paper it's t1-t4
+	var t0, t1, t2, t3, t4, t5, t6, c0, c1, c2, d1, d2 fp.Element
+	t0.Square(&x.A0)
+	t1.Square(&x.A1)
+	t2.Square(&x.A2)
+	t3.Mul(&x.A0, &x.A1)
+	t4.Mul(&x.A0, &x.A2)
+	t5.Mul(&x.A1, &x.A2)
+	c0.MulByNonResidue(&t5).Neg(&c0).Add(&c0, &t0)
+	c1.MulByNonResidue(&t2).Sub(&c1, &t3)
+	c2.Sub(&t1, &t4)
+	t6.Mul(&x.A0, &c0)
+	d1.Mul(&x.A2, &c1)
+	d2.Mul(&x.A1, &c2)
+	d1.Add(&d1, &d2).MulByNonResidue(&d1)
+	t6.Add(&t6, &d1)
+	t6.Inverse(&t6)
+	z.A0.Mul(&c0, &t6)
+	z.A1.Mul(&c1, &t6)
+	z.A2.Mul(&c2, &t6)
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/e3_test.go b/ecc/bw6-756/internal/fptower/e3_test.go
new file mode 100644
index 000000000..87e783576
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e3_test.go
@@ -0,0 +1,330 @@
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE3ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE3()
+	genB := GenE3()
+	genfp := GenFp()
+
+	properties.Property("[BW756] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul by element) should output the same result", prop.ForAll(
+		func(a *E3, b fp.Element) bool {
+			var c E3
+			c.MulByElement(a, &b)
+			a.MulByElement(a, &b)
+			return a.Equal(&c)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE3Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE3()
+	genB := GenE3()
+	genfp := GenFp()
+
+	properties.Property("[BW756] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E3) bool {
+			var c E3
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] square and mul should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b, c E3
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] MulByElement MulByElement inverse should leave an element invariant", prop.ForAll(
+		func(a *E3, b fp.Element) bool {
+			var c E3
+			var d fp.Element
+			d.Inverse(&b)
+			c.MulByElement(a, &b).MulByElement(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BW756] Double and mul by 2 should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			var c fp.Element
+			c.SetUint64(2)
+			b.Double(a)
+			a.MulByElement(a, &c)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Mulbynonres should be the same as multiplying by (0,1)", prop.ForAll(
+		func(a *E3) bool {
+			var b, c, d E3
+			b.A1.SetOne()
+			c.MulByNonResidue(a)
+			d.Mul(a, &b)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E3) bool {
+			var b, c, d E3
+			var e, f fp.Element
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.A0)
+			f.Double(&a.A1)
+			return c.A1.IsZero() && d.A0.IsZero() && e.Equal(&c.A0) && f.Equal(&d.A1)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE3Add(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE3Sub(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE3Mul(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE3MulByElement(b *testing.B) {
+	var a E3
+	var c fp.Element
+	c.SetRandom()
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByElement(&a, &c)
+	}
+}
+
+func BenchmarkE3Square(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE3Inverse(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE3MulNonRes(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidue(&a)
+	}
+}
+
+func BenchmarkE3Conjugate(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
diff --git a/ecc/bw6-756/internal/fptower/e6.go b/ecc/bw6-756/internal/fptower/e6.go
new file mode 100644
index 000000000..7a794fb0c
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6.go
@@ -0,0 +1,412 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"errors"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// E6 is a degree two finite field extension of fp3
+type E6 struct {
+	B0, B1 E3
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E6) Equal(x *E6) bool {
+	return z.B0.Equal(&x.B0) && z.B1.Equal(&x.B1)
+}
+
+// String puts E6 in string form
+func (z *E6) String() string {
+	return (z.B0.String() + "+(" + z.B1.String() + ")*v")
+}
+
+// SetString sets a E6 from string
+func (z *E6) SetString(s0, s1, s2, s3, s4, s5 string) *E6 {
+	z.B0.SetString(s0, s1, s2)
+	z.B1.SetString(s3, s4, s5)
+	return z
+}
+
+// Set copies x into z and returns z
+func (z *E6) Set(x *E6) *E6 {
+	*z = *x
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E6) SetOne() *E6 {
+	*z = E6{}
+	z.B0.A0.SetOne()
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E6) ToMont() *E6 {
+	z.B0.ToMont()
+	z.B1.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E6) FromMont() *E6 {
+	z.B0.FromMont()
+	z.B1.FromMont()
+	return z
+}
+
+// Add set z=x+y in E6 and return z
+func (z *E6) Add(x, y *E6) *E6 {
+	z.B0.Add(&x.B0, &y.B0)
+	z.B1.Add(&x.B1, &y.B1)
+	return z
+}
+
+// Sub sets z to x sub y and return z
+func (z *E6) Sub(x, y *E6) *E6 {
+	z.B0.Sub(&x.B0, &y.B0)
+	z.B1.Sub(&x.B1, &y.B1)
+	return z
+}
+
+// Double sets z=2*x and returns z
+func (z *E6) Double(x *E6) *E6 {
+	z.B0.Double(&x.B0)
+	z.B1.Double(&x.B1)
+	return z
+}
+
+// SetRandom used only in tests
+func (z *E6) SetRandom() (*E6, error) {
+	if _, err := z.B0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Mul set z=x*y in E6 and return z
+func (z *E6) Mul(x, y *E6) *E6 {
+	var a, b, c E3
+	a.Add(&x.B0, &x.B1)
+	b.Add(&y.B0, &y.B1)
+	a.Mul(&a, &b)
+	b.Mul(&x.B0, &y.B0)
+	c.Mul(&x.B1, &y.B1)
+	z.B1.Sub(&a, &b).Sub(&z.B1, &c)
+	z.B0.MulByNonResidue(&c).Add(&z.B0, &b)
+	return z
+}
+
+// Square set z=x*x in E6 and return z
+func (z *E6) Square(x *E6) *E6 {
+
+	//Algorithm 22 from https://eprint.iacr.org/2010/354.pdf
+	var c0, c2, c3 E3
+	c0.Sub(&x.B0, &x.B1)
+	c3.MulByNonResidue(&x.B1).Neg(&c3).Add(&x.B0, &c3)
+	c2.Mul(&x.B0, &x.B1)
+	c0.Mul(&c0, &c3).Add(&c0, &c2)
+	z.B1.Double(&c2)
+	c2.MulByNonResidue(&c2)
+	z.B0.Add(&c0, &c2)
+
+	return z
+}
+
+// Karabina's compressed cyclotomic square
+// https://eprint.iacr.org/2010/542.pdf
+// Th. 3.2 with minor modifications to fit our tower
+func (z *E6) CyclotomicSquareCompressed(x *E6) *E6 {
+
+	var t [7]fp.Element
+
+	// t0 = g1^2
+	t[0].Square(&x.B0.A1)
+	// t1 = g5^2
+	t[1].Square(&x.B1.A2)
+	// t5 = g1 + g5
+	t[5].Add(&x.B0.A1, &x.B1.A2)
+	// t2 = (g1 + g5)^2
+	t[2].Square(&t[5])
+
+	// t3 = g1^2 + g5^2
+	t[3].Add(&t[0], &t[1])
+	// t5 = 2 * g1 * g5
+	t[5].Sub(&t[2], &t[3])
+
+	// t6 = g3 + g2
+	t[6].Add(&x.B1.A0, &x.B0.A2)
+	// t3 = (g3 + g2)^2
+	t[3].Square(&t[6])
+	// t2 = g3^2
+	t[2].Square(&x.B1.A0)
+
+	// t6 = 2 * nr * g1 * g5
+	t[6].MulByNonResidue(&t[5])
+	// t5 = 4 * nr * g1 * g5 + 2 * g3
+	t[5].Add(&t[6], &x.B1.A0).
+		Double(&t[5])
+	// z3 = 6 * nr * g1 * g5 + 2 * g3
+	z.B1.A0.Add(&t[5], &t[6])
+
+	// t4 = nr * g5^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = nr * g5^2 + g1^2
+	t[5].Add(&t[0], &t[4])
+	// t6 = nr * g5^2 + g1^2 - g2
+	t[6].Sub(&t[5], &x.B0.A2)
+
+	// t1 = g2^2
+	t[1].Square(&x.B0.A2)
+
+	// t6 = 2 * nr * g5^2 + 2 * g1^2 - 2*g2
+	t[6].Double(&t[6])
+	// z2 = 3 * nr * g5^2 + 3 * g1^2 - 2*g2
+	z.B0.A2.Add(&t[6], &t[5])
+
+	// t4 = nr * g2^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = g3^2 + nr * g2^2
+	t[5].Add(&t[2], &t[4])
+	// t6 = g3^2 + nr * g2^2 - g1
+	t[6].Sub(&t[5], &x.B0.A1)
+	// t6 = 2 * g3^2 + 2 * nr * g2^2 - 2 * g1
+	t[6].Double(&t[6])
+	// z1 = 3 * g3^2 + 3 * nr * g2^2 - 2 * g1
+	z.B0.A1.Add(&t[6], &t[5])
+
+	// t0 = g2^2 + g3^2
+	t[0].Add(&t[2], &t[1])
+	// t5 = 2 * g3 * g2
+	t[5].Sub(&t[3], &t[0])
+	// t6 = 2 * g3 * g2 + g5
+	t[6].Add(&t[5], &x.B1.A2)
+	// t6 = 4 * g3 * g2 + 2 * g5
+	t[6].Double(&t[6])
+	// z5 = 6 * g3 * g2 + 2 * g5
+	z.B1.A2.Add(&t[5], &t[6])
+
+	return z
+}
+
+// Decompress Karabina's cyclotomic square result
+func (z *E6) Decompress(x *E6) *E6 {
+
+	var t [3]fp.Element
+	var one fp.Element
+	one.SetOne()
+
+	// t0 = g1^2
+	t[0].Square(&x.B0.A1)
+	// t1 = 3 * g1^2 - 2 * g2
+	t[1].Sub(&t[0], &x.B0.A2).
+		Double(&t[1]).
+		Add(&t[1], &t[0])
+		// t0 = E * g5^2 + t1
+	t[2].Square(&x.B1.A2)
+	t[0].MulByNonResidue(&t[2]).
+		Add(&t[0], &t[1])
+	// t1 = 1/(4 * g3)
+	t[1].Double(&x.B1.A0).
+		Double(&t[1]).
+		Inverse(&t[1]) // costly
+	// z4 = g4
+	z.B1.A1.Mul(&t[0], &t[1])
+
+	// t1 = g2 * g1
+	t[1].Mul(&x.B0.A2, &x.B0.A1)
+	// t2 = 2 * g4^2 - 3 * g2 * g1
+	t[2].Square(&x.B1.A1).
+		Sub(&t[2], &t[1]).
+		Double(&t[2]).
+		Sub(&t[2], &t[1])
+	// t1 = g3 * g5
+	t[1].Mul(&x.B1.A0, &x.B1.A2)
+	// c_0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+	t[2].Add(&t[2], &t[1])
+	z.B0.A0.MulByNonResidue(&t[2]).
+		Add(&z.B0.A0, &one)
+
+	z.B0.A1.Set(&x.B0.A1)
+	z.B0.A2.Set(&x.B0.A2)
+	z.B1.A0.Set(&x.B1.A0)
+	z.B1.A2.Set(&x.B1.A2)
+
+	return z
+}
+
+// Granger-Scott's cyclotomic square
+// https://eprint.iacr.org/2009/565.pdf, 3.2
+func (z *E6) CyclotomicSquare(x *E6) *E6 {
+	// x=(x0,x1,x2,x3,x4,x5,x6,x7) in E3^6
+	// cyclosquare(x)=(3*x4^2*u + 3*x0^2 - 2*x0,
+	//					3*x2^2*u + 3*x3^2 - 2*x1,
+	//					3*x5^2*u + 3*x1^2 - 2*x2,
+	//					6*x1*x5*u + 2*x3,
+	//					6*x0*x4 + 2*x4,
+	//					6*x2*x3 + 2*x5)
+
+	var t [9]fp.Element
+
+	t[0].Square(&x.B1.A1)
+	t[1].Square(&x.B0.A0)
+	t[6].Add(&x.B1.A1, &x.B0.A0).Square(&t[6]).Sub(&t[6], &t[0]).Sub(&t[6], &t[1]) // 2*x4*x0
+	t[2].Square(&x.B0.A2)
+	t[3].Square(&x.B1.A0)
+	t[7].Add(&x.B0.A2, &x.B1.A0).Square(&t[7]).Sub(&t[7], &t[2]).Sub(&t[7], &t[3]) // 2*x2*x3
+	t[4].Square(&x.B1.A2)
+	t[5].Square(&x.B0.A1)
+	t[8].Add(&x.B1.A2, &x.B0.A1).Square(&t[8]).Sub(&t[8], &t[4]).Sub(&t[8], &t[5]).MulByNonResidue(&t[8]) // 2*x5*x1*u
+
+	t[0].MulByNonResidue(&t[0]).Add(&t[0], &t[1]) // x4^2*u + x0^2
+	t[2].MulByNonResidue(&t[2]).Add(&t[2], &t[3]) // x2^2*u + x3^2
+	t[4].MulByNonResidue(&t[4]).Add(&t[4], &t[5]) // x5^2*u + x1^2
+
+	z.B0.A0.Sub(&t[0], &x.B0.A0).Double(&z.B0.A0).Add(&z.B0.A0, &t[0])
+	z.B0.A1.Sub(&t[2], &x.B0.A1).Double(&z.B0.A1).Add(&z.B0.A1, &t[2])
+	z.B0.A2.Sub(&t[4], &x.B0.A2).Double(&z.B0.A2).Add(&z.B0.A2, &t[4])
+
+	z.B1.A0.Add(&t[8], &x.B1.A0).Double(&z.B1.A0).Add(&z.B1.A0, &t[8])
+	z.B1.A1.Add(&t[6], &x.B1.A1).Double(&z.B1.A1).Add(&z.B1.A1, &t[6])
+	z.B1.A2.Add(&t[7], &x.B1.A2).Double(&z.B1.A2).Add(&z.B1.A2, &t[7])
+
+	return z
+}
+
+// Inverse set z to the inverse of x in E6 and return z
+func (z *E6) Inverse(x *E6) *E6 {
+	// Algorithm 23 from https://eprint.iacr.org/2010/354.pdf
+
+	var t0, t1, tmp E3
+	t0.Square(&x.B0)
+	t1.Square(&x.B1)
+	tmp.MulByNonResidue(&t1)
+	t0.Sub(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.B0.Mul(&x.B0, &t1)
+	z.B1.Mul(&x.B1, &t1).Neg(&z.B1)
+
+	return z
+}
+
+// Exp sets z=x**e and returns it
+func (z *E6) Exp(x *E6, e big.Int) *E6 {
+	var res E6
+	res.SetOne()
+	b := e.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0x80)
+		for j := 7; j >= 0; j-- {
+			res.Square(&res)
+			if (w&mask)>>j != 0 {
+				res.Mul(&res, x)
+			}
+			mask = mask >> 1
+		}
+	}
+	z.Set(&res)
+	return z
+}
+
+// InverseUnitary inverse a unitary element
+func (z *E6) InverseUnitary(x *E6) *E6 {
+	return z.Conjugate(x)
+}
+
+// Conjugate set z to x conjugated and return z
+func (z *E6) Conjugate(x *E6) *E6 {
+	*z = *x
+	z.B1.Neg(&z.B1)
+	return z
+}
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fp.Bytes * 6
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E6) Bytes() (r [SizeOfGT]byte) {
+
+	offset := 0
+	var buf [fp.Bytes]byte
+
+	buf = z.B1.A2.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B1.A1.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B1.A0.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A2.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A1.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A0.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+
+	return
+}
+
+// SetBytes interprets e as the bytes of a big-endian GT
+// sets z to that value (in Montgomery form), and returns z.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E6) SetBytes(e []byte) error {
+	if len(e) != SizeOfGT {
+		return errors.New("invalid buffer size")
+	}
+	offset := 0
+	z.B1.A2.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B1.A1.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B1.A0.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A2.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A1.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A0.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+
+	return nil
+}
+
+// IsInSubGroup ensures GT/E6 is in correct sugroup
+func (z *E6) IsInSubGroup() bool {
+	var one, _z E6
+	one.SetOne()
+	_z.Exp(z, *fr.Modulus())
+	return _z.Equal(&one)
+}
diff --git a/ecc/bw6-756/internal/fptower/e6_pairing.go b/ecc/bw6-756/internal/fptower/e6_pairing.go
new file mode 100644
index 000000000..c177bfaa7
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6_pairing.go
@@ -0,0 +1,127 @@
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+func (z *E6) nSquare(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquare(z)
+	}
+}
+
+func (z *E6) nSquareCompressed(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquareCompressed(z)
+	}
+}
+
+// Expt set z to x^t in E6 and return z
+func (z *E6) Expt(x *E6) *E6 {
+
+	// Expt computation is derived from the addition chain:
+	//
+	//	_1000     = 1 << 3
+	//	_1001     = 1 + _1000
+	//	_1001000  = _1001 << 3
+	//	_1010001  = _1001 + _1001000
+	//	_10011001 = _1001000 + _1010001
+	//	i67       = ((_10011001 << 5 + _1001) << 10 + _1010001) << 41
+	//	return      1 + i67
+	//
+	// Operations: 62 squares 6 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Allocate Temporaries.
+	var result, t0, t1 E6
+
+	// Step 3: result = x^0x8
+	result.CyclotomicSquare(x)
+	result.nSquare(2)
+
+	// Step 4: t0 = x^0x9
+	t0.Mul(x, &result)
+
+	// Step 7: t1 = x^0x48
+	t1.CyclotomicSquare(&t0)
+	t1.nSquare(2)
+
+	// Step 8: result = x^0x51
+	result.Mul(&t0, &t1)
+
+	// Step 9: t1 = x^0x99
+	t1.Mul(&t1, &result)
+
+	// Step 14: t1 = x^0x1320
+	t1.nSquare(5)
+
+	// Step 15: t0 = x^0x1329
+	t0.Mul(&t0, &t1)
+
+	// Step 25: t0 = x^0x4ca400
+	t0.nSquare(10)
+
+	// Step 26: result = x^0x4ca451
+	result.Mul(&result, &t0)
+
+	// Step 67: result = x^0x9948a20000000000
+	result.nSquareCompressed(41)
+	result.Decompress(&result)
+
+	// Step 68: result = x^0x9948a20000000001
+	z.Mul(x, &result)
+
+	return z
+}
+
+// MulBy034 multiplication by sparse element (c0,0,0,c3,c4,0)
+func (z *E6) MulBy034(c0, c3, c4 *fp.Element) *E6 {
+
+	var a, b, d E3
+
+	a.MulByElement(&z.B0, c0)
+
+	b.Set(&z.B1)
+	b.MulBy01(c3, c4)
+
+	c0.Add(c0, c3)
+	d.Add(&z.B0, &z.B1)
+	d.MulBy01(c0, c4)
+
+	z.B1.Add(&a, &b).Neg(&z.B1).Add(&z.B1, &d)
+	z.B0.MulByNonResidue(&b).Add(&z.B0, &a)
+
+	return z
+}
+
+// Mul034By034 multiplication of sparse element (c0,0,0,c3,c4,0) by sparse element (d0,0,0,d3,d4,0)
+func (z *E6) Mul034By034(d0, d3, d4, c0, c3, c4 *fp.Element) *E6 {
+	var tmp, x0, x3, x4, x04, x03, x34 fp.Element
+	x0.Mul(c0, d0)
+	x3.Mul(c3, d3)
+	x4.Mul(c4, d4)
+	tmp.Add(c0, c4)
+	x04.Add(d0, d4).
+		Mul(&x04, &tmp).
+		Sub(&x04, &x0).
+		Sub(&x04, &x4)
+	tmp.Add(c0, c3)
+	x03.Add(d0, d3).
+		Mul(&x03, &tmp).
+		Sub(&x03, &x0).
+		Sub(&x03, &x3)
+	tmp.Add(c3, c4)
+	x34.Add(d3, d4).
+		Mul(&x34, &tmp).
+		Sub(&x34, &x3).
+		Sub(&x34, &x4)
+
+	z.B0.A0.MulByNonResidue(&x4).
+		Add(&z.B0.A0, &x0)
+	z.B0.A1.Set(&x3)
+	z.B0.A2.Set(&x34)
+	z.B1.A0.Set(&x03)
+	z.B1.A1.Set(&x04)
+	z.B1.A2.SetZero()
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/e6_test.go b/ecc/bw6-756/internal/fptower/e6_test.go
new file mode 100644
index 000000000..078d5c7b1
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6_test.go
@@ -0,0 +1,387 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE6Serialization(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+
+	properties.Property("[BW6-756] SetBytes(Bytes()) should stay constant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			buf := a.Bytes()
+			if err := b.SetBytes(buf[:]); err != nil {
+				return false
+			}
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+
+	properties.Property("[BW6-756] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Cyclotomic square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.CyclotomicSquare(a)
+			a.CyclotomicSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Frobenius) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Frobenius(a)
+			a.Frobenius(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+
+	properties.Property("[BW6-756] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c E6
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] square and mul should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			var e, f, g E3
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.B0)
+			f.Double(&a.B1)
+			return c.B1.Equal(&g) && d.B0.Equal(&g) && e.Equal(&c.B0) && f.Equal(&d.B1)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] pi**12=id", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Frobenius(a).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] cyclotomic square (Granger-Scott) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.Frobenius(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquare(a)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] compressed cyclotomic square (Karabina) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.Frobenius(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquareCompressed(a).Decompress(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Frobenius of x in E6 should be equal to x^q", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			q := fp.Modulus()
+			b.Frobenius(a)
+			c.Exp(a, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE6Add(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE6Sub(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE6Mul(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE6Cyclosquare(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.CyclotomicSquare(&a)
+	}
+}
+
+func BenchmarkE6Square(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE6Inverse(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE6Conjugate(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
+
+func BenchmarkE6Frobenius(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Frobenius(&a)
+	}
+}
+
+func BenchmarkE6Expt(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	b.ResetTimer()
+	c.Conjugate(&a)
+	a.Inverse(&a)
+	c.Mul(&c, &a)
+
+	a.Frobenius(&c).
+		Mul(&a, &c)
+
+	for i := 0; i < b.N; i++ {
+		a.Expt(&a)
+	}
+}
diff --git a/ecc/bw6-756/internal/fptower/frobenius.go b/ecc/bw6-756/internal/fptower/frobenius.go
new file mode 100644
index 000000000..73a7602c6
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/frobenius.go
@@ -0,0 +1,102 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+var _frobA = fp.Element{
+	4513305906938863657,
+	16223881110415437916,
+	2594807996890465129,
+	12027263585750947831,
+	4394688080420790544,
+	16545365607090591069,
+	17206939158340345469,
+	16693218895653628888,
+	12341936222077983834,
+	15961798706098381578,
+	6325965824540199947,
+	854909948470066,
+}
+var _frobB = fp.Element{
+	13933438166770692198,
+	9936849508207988643,
+	15731274946730933551,
+	17453539207763286666,
+	9211229669332609391,
+	16304457798847396452,
+	9530634072302290725,
+	16589137634438497937,
+	3757329544587311773,
+	6048657743386074056,
+	539268601340212626,
+	3128351770947469,
+}
+var _frobC = fp.Element{
+	4513305906938859419,
+	12241098542315434076,
+	17754824365858099600,
+	5821813791745674579,
+	7115107423905013045,
+	2898523548767316962,
+	7403683460125356932,
+	16613279480632639560,
+	14397298621774850312,
+	623298467364696769,
+	15794181680107729725,
+	1224261424482813,
+}
+var _frobAC = fp.Element{
+	4239,
+	7713986544913874944,
+	18326082943621398681,
+	11034058719804682881,
+	13605917749753399936,
+	14403079332228435905,
+	8290829156933084579,
+	14835612456382575210,
+	16099265766665295608,
+	3563712375774904018,
+	6865234425880412574,
+	3983261719417535,
+}
+var _frobBC = fp.Element{
+	13933438166770687960,
+	5954066940107984803,
+	12444547241989016406,
+	11248089413758013415,
+	11931649012816831892,
+	2657615740524122345,
+	18174122447796853804,
+	16509198219417508608,
+	5812691944284178251,
+	9156901578361940863,
+	10007484456907742403,
+	3497703246960216,
+}
+
+// Frobenius set z in E6 to Frobenius(x), return z
+func (z *E6) Frobenius(x *E6) *E6 {
+
+	z.B0.A0 = x.B0.A0
+	z.B0.A1.Mul(&x.B0.A1, &_frobA)
+	z.B0.A2.Mul(&x.B0.A2, &_frobB)
+
+	z.B1.A0.Mul(&x.B1.A0, &_frobC)
+	z.B1.A1.Mul(&x.B1.A1, &_frobAC)
+	z.B1.A2.Mul(&x.B1.A2, &_frobBC)
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/generators_test.go b/ecc/bw6-756/internal/fptower/generators_test.go
new file mode 100644
index 000000000..2c948398f
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/generators_test.go
@@ -0,0 +1,43 @@
+package fptower
+
+import (
+	"crypto/rand"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+)
+
+// TODO all gopter.Gen are incorrect, use same model as goff
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		rand.Read(b[:])
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE3 generates an E3 elmt
+func GenE3() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) *E3 {
+		return &E3{A0: values[0].(fp.Element), A1: values[1].(fp.Element), A2: values[2].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE3(),
+		GenE3(),
+	).Map(func(values []interface{}) *E6 {
+		return &E6{B0: *values[0].(*E3), B1: *values[1].(*E3)}
+	})
+}
diff --git a/ecc/bw6-756/marshal.go b/ecc/bw6-756/marshal.go
new file mode 100644
index 000000000..9d67b4e82
--- /dev/null
+++ b/ecc/bw6-756/marshal.go
@@ -0,0 +1,1155 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+	"reflect"
+	"sync/atomic"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// To encode G1Affine and G2Affine points, we mask the most significant bits with these bits to specify without ambiguity
+// metadata needed for point (de)compression
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+const (
+	mMask                 byte = 0b111 << 5
+	mUncompressed         byte = 0b000 << 5
+	mUncompressedInfinity byte = 0b010 << 5
+	mCompressedSmallest   byte = 0b100 << 5
+	mCompressedLargest    byte = 0b101 << 5
+	mCompressedInfinity   byte = 0b110 << 5
+)
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fptower.SizeOfGT
+
+// Encoder writes bw6-756 object values to an output stream
+type Encoder struct {
+	w   io.Writer
+	n   int64 // written bytes
+	raw bool  // raw vs compressed encoding
+}
+
+// Decoder reads bw6-756 object values from an inbound stream
+type Decoder struct {
+	r             io.Reader
+	n             int64 // read bytes
+	subGroupCheck bool  // default to true
+}
+
+// NewDecoder returns a binary decoder supporting curve bw6-756 objects in both
+// compressed and uncompressed (raw) forms
+func NewDecoder(r io.Reader, options ...func(*Decoder)) *Decoder {
+	d := &Decoder{r: r, subGroupCheck: true}
+
+	for _, o := range options {
+		o(d)
+	}
+
+	return d
+}
+
+// Decode reads the binary encoding of v from the stream
+// type must be *uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, *[]G1Affine or *[]G2Affine
+func (dec *Decoder) Decode(v interface{}) (err error) {
+	rv := reflect.ValueOf(v)
+	if rv.Kind() != reflect.Ptr || rv.IsNil() || !rv.Elem().CanSet() {
+		return errors.New("bw6-756 decoder: unsupported type, need pointer")
+	}
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// in particular, careful attention must be given to usage of Bytes() method on Elements and Points
+	// that return an array (not a slice) of bytes. Using this is beneficial to minimize memallocs
+	// in very large (de)serialization upstream in gnark.
+	// (but detrimental to code lisibility here)
+	// TODO double check memory usage and factorize this
+
+	var buf [SizeOfG2AffineUncompressed]byte
+	var read int
+
+	switch t := v.(type) {
+	case *fr.Element:
+		read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fr.Bytes])
+		return
+	case *fp.Element:
+		read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fp.Bytes])
+		return
+	case *[]fr.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fr.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fr.Bytes])
+		}
+		return
+	case *[]fp.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fp.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fp.Bytes])
+		}
+		return
+	case *G1Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG1AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG1AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *G2Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG2AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG2AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *[]G1Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G1Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG1AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG1AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	case *[]G2Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G2Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG2AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG2AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("bw6-756 encoder: unsupported type")
+		}
+		err = binary.Read(dec.r, binary.BigEndian, t)
+		if err == nil {
+			dec.n += int64(n)
+		}
+		return
+	}
+}
+
+// BytesRead return total bytes read from reader
+func (dec *Decoder) BytesRead() int64 {
+	return dec.n
+}
+
+func (dec *Decoder) readUint32() (r uint32, err error) {
+	var read int
+	var buf [4]byte
+	read, err = io.ReadFull(dec.r, buf[:4])
+	dec.n += int64(read)
+	if err != nil {
+		return
+	}
+	r = binary.BigEndian.Uint32(buf[:4])
+	return
+}
+
+func isCompressed(msb byte) bool {
+	mData := msb & mMask
+	return !((mData == mUncompressed) || (mData == mUncompressedInfinity))
+}
+
+// NewEncoder returns a binary encoder supporting curve bw6-756 objects
+func NewEncoder(w io.Writer, options ...func(*Encoder)) *Encoder {
+	// default settings
+	enc := &Encoder{
+		w:   w,
+		n:   0,
+		raw: false,
+	}
+
+	// handle options
+	for _, option := range options {
+		option(enc)
+	}
+
+	return enc
+}
+
+// Encode writes the binary encoding of v to the stream
+// type must be uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, []G1Affine or []G2Affine
+func (enc *Encoder) Encode(v interface{}) (err error) {
+	if enc.raw {
+		return enc.encodeRaw(v)
+	}
+	return enc.encode(v)
+}
+
+// BytesWritten return total bytes written on writer
+func (enc *Encoder) BytesWritten() int64 {
+	return enc.n
+}
+
+// RawEncoding returns an option to use in NewEncoder(...) which sets raw encoding mode to true
+// points will not be compressed using this option
+func RawEncoding() func(*Encoder) {
+	return func(enc *Encoder) {
+		enc.raw = true
+	}
+}
+
+// NoSubgroupChecks returns an option to use in NewDecoder(...) which disable subgroup checks on the points
+// the decoder will read. Use with caution, as crafted points from an untrusted source can lead to crypto-attacks.
+func NoSubgroupChecks() func(*Decoder) {
+	return func(dec *Decoder) {
+		dec.subGroupCheck = false
+	}
+}
+
+func (enc *Encoder) encode(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+func (enc *Encoder) encodeRaw(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+// SizeOfG1AffineCompressed represents the size in bytes that a G1Affine need in binary form, compressed
+const SizeOfG1AffineCompressed = 96
+
+// SizeOfG1AffineUncompressed represents the size in bytes that a G1Affine need in binary form, uncompressed
+const SizeOfG1AffineUncompressed = SizeOfG1AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G1Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G1Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G1Affine) Bytes() (res [SizeOfG1AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G1Affine) RawBytes() (res [SizeOfG1AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+	binary.BigEndian.PutUint64(res[136:144], tmp[6])
+	binary.BigEndian.PutUint64(res[128:136], tmp[7])
+	binary.BigEndian.PutUint64(res[120:128], tmp[8])
+	binary.BigEndian.PutUint64(res[112:120], tmp[9])
+	binary.BigEndian.PutUint64(res[104:112], tmp[10])
+	binary.BigEndian.PutUint64(res[96:104], tmp[11])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G1Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G1Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG1AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG1AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG1AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G1Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G1Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
+
+// SizeOfG2AffineCompressed represents the size in bytes that a G2Affine need in binary form, compressed
+const SizeOfG2AffineCompressed = 96
+
+// SizeOfG2AffineUncompressed represents the size in bytes that a G2Affine need in binary form, uncompressed
+const SizeOfG2AffineUncompressed = SizeOfG2AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G2Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G2Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G2Affine) Bytes() (res [SizeOfG2AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+	binary.BigEndian.PutUint64(res[136:144], tmp[6])
+	binary.BigEndian.PutUint64(res[128:136], tmp[7])
+	binary.BigEndian.PutUint64(res[120:128], tmp[8])
+	binary.BigEndian.PutUint64(res[112:120], tmp[9])
+	binary.BigEndian.PutUint64(res[104:112], tmp[10])
+	binary.BigEndian.PutUint64(res[96:104], tmp[11])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G2Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G2Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG2AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG2AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG2AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G2Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G2Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
diff --git a/ecc/bw6-756/marshal_test.go b/ecc/bw6-756/marshal_test.go
new file mode 100644
index 000000000..96540df8e
--- /dev/null
+++ b/ecc/bw6-756/marshal_test.go
@@ -0,0 +1,457 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"bytes"
+	"io"
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+)
+
+func TestEncoder(t *testing.T) {
+
+	// TODO need proper fuzz testing here
+
+	var inA uint64
+	var inB fr.Element
+	var inC fp.Element
+	var inD G1Affine
+	var inE G1Affine
+	var inF G2Affine
+	var inG []G1Affine
+	var inH []G2Affine
+	var inI []fp.Element
+	var inJ []fr.Element
+
+	// set values of inputs
+	inA = rand.Uint64()
+	inB.SetRandom()
+	inC.SetRandom()
+	inD.ScalarMultiplication(&g1GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	// inE --> infinity
+	inF.ScalarMultiplication(&g2GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	inG = make([]G1Affine, 2)
+	inH = make([]G2Affine, 0)
+	inG[1] = inD
+	inI = make([]fp.Element, 3)
+	inI[2] = inD.X
+	inJ = make([]fr.Element, 0)
+
+	// encode them, compressed and raw
+	var buf, bufRaw bytes.Buffer
+	enc := NewEncoder(&buf)
+	encRaw := NewEncoder(&bufRaw, RawEncoding())
+	toEncode := []interface{}{inA, &inB, &inC, &inD, &inE, &inF, inG, inH, inI, inJ}
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+		if err := encRaw.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	testDecode := func(t *testing.T, r io.Reader, n int64) {
+		dec := NewDecoder(r)
+		var outA uint64
+		var outB fr.Element
+		var outC fp.Element
+		var outD G1Affine
+		var outE G1Affine
+		outE.X.SetOne()
+		outE.Y.SetUint64(42)
+		var outF G2Affine
+		var outG []G1Affine
+		var outH []G2Affine
+		var outI []fp.Element
+		var outJ []fr.Element
+
+		toDecode := []interface{}{&outA, &outB, &outC, &outD, &outE, &outF, &outG, &outH, &outI, &outJ}
+		for _, v := range toDecode {
+			if err := dec.Decode(v); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// compare values
+		if inA != outA {
+			t.Fatal("didn't encode/decode uint64 value properly")
+		}
+
+		if !inB.Equal(&outB) || !inC.Equal(&outC) {
+			t.Fatal("decode(encode(Element) failed")
+		}
+		if !inD.Equal(&outD) || !inE.Equal(&outE) {
+			t.Fatal("decode(encode(G1Affine) failed")
+		}
+		if !inF.Equal(&outF) {
+			t.Fatal("decode(encode(G2Affine) failed")
+		}
+		if (len(inG) != len(outG)) || (len(inH) != len(outH)) {
+			t.Fatal("decode(encode(slice(points))) failed")
+		}
+		for i := 0; i < len(inG); i++ {
+			if !inG[i].Equal(&outG[i]) {
+				t.Fatal("decode(encode(slice(points))) failed")
+			}
+		}
+		if (len(inI) != len(outI)) || (len(inJ) != len(outJ)) {
+			t.Fatal("decode(encode(slice(elements))) failed")
+		}
+		for i := 0; i < len(inI); i++ {
+			if !inI[i].Equal(&outI[i]) {
+				t.Fatal("decode(encode(slice(elements))) failed")
+			}
+		}
+		if n != dec.BytesRead() {
+			t.Fatal("bytes read don't match bytes written")
+		}
+	}
+
+	// decode them
+	testDecode(t, &buf, enc.BytesWritten())
+	testDecode(t, &bufRaw, encRaw.BytesWritten())
+
+}
+
+func TestIsCompressed(t *testing.T) {
+	var g1Inf, g1 G1Affine
+	var g2Inf, g2 G2Affine
+
+	g1 = g1GenAff
+	g2 = g2GenAff
+
+	{
+		b := g1Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g1.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2.RawBytes() should be uncompressed")
+		}
+	}
+
+}
+
+func TestG1AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// define Gopters generators
+
+// GenFr generates an Fr element
+func GenFr() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fr.Element
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE3 generates an E3 elmt
+func GenE3() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) fptower.E3 {
+		return fptower.E3{A0: values[0].(fp.Element), A1: values[1].(fp.Element), A2: values[2].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE3(),
+		GenE3(),
+	).Map(func(values []interface{}) fptower.E6 {
+		return fptower.E6{B0: values[0].(fptower.E3), B1: values[1].(fptower.E3)}
+	})
+}
+
+// GenBigInt generates a big.Int
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
diff --git a/ecc/bw6-756/multiexp.go b/ecc/bw6-756/multiexp.go
new file mode 100644
index 000000000..dc4306401
--- /dev/null
+++ b/ecc/bw6-756/multiexp.go
@@ -0,0 +1,983 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+	"math"
+	"runtime"
+)
+
+// selector stores the index, mask and shifts needed to select bits from a scalar
+// it is used during the multiExp algorithm or the batch scalar multiplication
+type selector struct {
+	index uint64 // index in the multi-word scalar to select bits from
+	mask  uint64 // mask (c-bit wide)
+	shift uint64 // shift needed to get our bits on low positions
+
+	multiWordSelect bool   // set to true if we need to select bits from 2 words (case where c doesn't divide 64)
+	maskHigh        uint64 // same than mask, for index+1
+	shiftHigh       uint64 // same than shift, for index+1
+}
+
+// partitionScalars  compute, for each scalars over c-bit wide windows, nbChunk digits
+// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+// 2^{c} to the current digit, making it negative.
+// negative digits can be processed in a later step as adding -G into the bucket instead of G
+// (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
+// scalarsMont indicates wheter the provided scalars are in montgomery form
+// returns smallValues, which represent the number of scalars which meets the following condition
+// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
+func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
+	toReturn := make([]fr.Element, len(scalars))
+
+	// number of c-bit radixes in a scalar
+	nbChunks := fr.Limbs * 64 / c
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+
+	mask := uint64((1 << c) - 1)      // low c bits are 1
+	msbWindow := uint64(1 << (c - 1)) // msb of the c-bit window
+	max := int(1 << (c - 1))          // max value we want for our digits
+	cDivides64 := (64 % c) == 0       // if c doesn't divide 64, we may need to select over multiple words
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := uint64(0); chunk < nbChunks; chunk++ {
+		jc := uint64(chunk * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = !cDivides64 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+
+	// for each chunk, we could track the number of non-zeros points we will need to process
+	// this way, if a chunk has more work to do than others, we can spawn off more go routines
+	// (at the cost of more buckets allocated)
+	// a simplified approach is to track the small values where only the first word is set
+	// if this number represent a significant number of points, then we will split first chunk
+	// processing in the msm in 2, to ensure all go routines finish at ~same time
+	// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
+	// if it does, though, this will deadlocK.
+	chSmallValues := make(chan int, nbTasks)
+
+	parallel.Execute(len(scalars), func(start, end int) {
+		smallValues := 0
+		for i := start; i < end; i++ {
+			var carry int
+
+			scalar := scalars[i]
+			if scalarsMont {
+				scalar.FromMont()
+			}
+			if scalar.IsUint64() {
+				// everything is 0, no need to process this scalar
+				if scalar[0] == 0 {
+					continue
+				}
+				// low c-bits are 1 in mask
+				if scalar[0]&mask == scalar[0] {
+					smallValues++
+				}
+			}
+
+			// for each chunk in the scalar, compute the current digit, and an eventual carry
+			for chunk := uint64(0); chunk < nbChunks; chunk++ {
+				s := selectors[chunk]
+
+				// init with carry if any
+				digit := carry
+				carry = 0
+
+				// digit = value of the c-bit window
+				digit += int((scalar[s.index] & s.mask) >> s.shift)
+
+				if s.multiWordSelect {
+					// we are selecting bits over 2 words
+					digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
+				}
+
+				// if digit is zero, no impact on result
+				if digit == 0 {
+					continue
+				}
+
+				// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+				// 2^{c} to the current digit, making it negative.
+				if digit >= max {
+					digit -= (1 << c)
+					carry = 1
+				}
+
+				var bits uint64
+				if digit >= 0 {
+					bits = uint64(digit)
+				} else {
+					bits = uint64(-digit-1) | msbWindow
+				}
+
+				toReturn[i][s.index] |= (bits << s.shift)
+				if s.multiWordSelect {
+					toReturn[i][s.index+1] |= (bits >> s.shiftHigh)
+				}
+
+			}
+		}
+
+		chSmallValues <- smallValues
+
+	}, nbTasks)
+
+	// aggregate small values
+	close(chSmallValues)
+	smallValues := 0
+	for o := range chSmallValues {
+		smallValues += o
+	}
+	return toReturn, smallValues
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Affine, error) {
+	var _p G1Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 8, 16}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG1Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G1Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG1Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG1Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG1Jac(p *G1Jac, c int, points []G1Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG1Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG1Affine(p *G1Jac, c int, chChunks []chan g1JacExtended) *G1Jac {
+	var _p g1JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG1Affine(chunk uint64,
+	chRes chan<- g1JacExtended,
+	buckets []g1JacExtended,
+	c uint64,
+	points []G1Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g1JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 384, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Affine, error) {
+	var _p G2Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 8, 16}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG2Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G2Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG2Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG2Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG2Jac(p *G2Jac, c int, points []G2Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG2Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG2Affine(p *G2Jac, c int, chChunks []chan g2JacExtended) *G2Jac {
+	var _p g2JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG2Affine(chunk uint64,
+	chRes chan<- g2JacExtended,
+	buckets []g2JacExtended,
+	c uint64,
+	points []G2Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g2JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 384, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
diff --git a/ecc/bw6-756/multiexp_test.go b/ecc/bw6-756/multiexp_test.go
new file mode 100644
index 000000000..c95bfed83
--- /dev/null
+++ b/ecc/bw6-756/multiexp_test.go
@@ -0,0 +1,701 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestMultiExpG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G1Affine
+	var g G1Jac
+	g.Set(&g1Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g1Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G1] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G1Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G1Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G1] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G1Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G1Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G1] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G1] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G1Jac
+			g.Set(&g1Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G1Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g1Gen)
+			}
+
+			var op1MultiExp G1Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G1Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g1GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG1(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var t1, t2, t3 G1Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
+
+func TestMultiExpG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G2Affine
+	var g G2Jac
+	g.Set(&g2Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g2Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G2] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G2Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G2Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G2] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G2Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G2Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G2] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G2] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G2Jac
+			g.Set(&g2Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G2Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g2Gen)
+			}
+
+			var op1MultiExp G2Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G2Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g2GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG2(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var t1, t2, t3 G2Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
diff --git a/ecc/bw6-756/pairing.go b/ecc/bw6-756/pairing.go
new file mode 100644
index 000000000..d1401819d
--- /dev/null
+++ b/ecc/bw6-756/pairing.go
@@ -0,0 +1,366 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"errors"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+)
+
+// GT target group of the pairing
+type GT = fptower.E6
+
+type lineEvaluation struct {
+	r0 fp.Element
+	r1 fp.Element
+	r2 fp.Element
+}
+
+// Pair calculates the reduced pairing for a set of points
+func Pair(P []G1Affine, Q []G2Affine) (GT, error) {
+	f, err := MillerLoop(P, Q)
+	if err != nil {
+		return GT{}, err
+	}
+	return FinalExponentiation(&f), nil
+}
+
+// PairingCheck calculates the reduced pairing for a set of points and returns True if the result is One
+func PairingCheck(P []G1Affine, Q []G2Affine) (bool, error) {
+	f, err := Pair(P, Q)
+	if err != nil {
+		return false, err
+	}
+	var one GT
+	one.SetOne()
+	return f.Equal(&one), nil
+}
+
+// FinalExponentiation computes the final expo x**(c*(p**3-1)(p+1)(p**2-p+1)/r)
+func FinalExponentiation(z *GT, _z ...*GT) GT {
+
+	var result GT
+	result.Set(z)
+
+	for _, e := range _z {
+		result.Mul(&result, e)
+	}
+
+	var buf GT
+
+	// easy part exponent: (p**3 - 1)*(p+1)
+	buf.Conjugate(&result)
+	result.Inverse(&result)
+	buf.Mul(&buf, &result)
+	result.Frobenius(&buf).
+		Mul(&result, &buf)
+
+		// hard part exponent: 12(u+1)(p**2 - p + 1)/r
+	var m1, _m1, m2, _m2, m3, f0, f0_36, g0, g1, _g1, g2, g3, _g3, g4, _g4, g5, _g5, g6, gA, gB, g034, _g1g2, gC, h1, h2, h2g2C, h4 GT
+	m1.Expt(&result)
+	_m1.Conjugate(&m1)
+	m2.Expt(&m1)
+	_m2.Conjugate(&m2)
+	m3.Expt(&m2)
+	f0.Frobenius(&result).
+		Mul(&f0, &result).
+		Mul(&f0, &m2)
+	m2.CyclotomicSquare(&_m1)
+	f0.Mul(&f0, &m2)
+	f0_36.CyclotomicSquare(&f0).
+		CyclotomicSquare(&f0_36).
+		CyclotomicSquare(&f0_36).
+		Mul(&f0_36, &f0).
+		CyclotomicSquare(&f0_36).
+		CyclotomicSquare(&f0_36)
+	g0.Mul(&result, &m1).
+		Frobenius(&g0).
+		Mul(&g0, &m3).
+		Mul(&g0, &_m2).
+		Mul(&g0, &_m1)
+	g1.Expt(&g0)
+	_g1.Conjugate(&g1)
+	g2.Expt(&g1)
+	g3.Expt(&g2)
+	_g3.Conjugate(&g3)
+	g4.Expt(&g3)
+	_g4.Conjugate(&g4)
+	g5.Expt(&g4)
+	_g5.Conjugate(&g5)
+	g6.Expt(&g5)
+	gA.Mul(&g3, &_g5).
+		CyclotomicSquare(&gA).
+		Mul(&gA, &g6).
+		Mul(&gA, &g1).
+		Mul(&gA, &g0)
+	g034.Mul(&g0, &g3).
+		Mul(&g034, &_g4)
+	gB.CyclotomicSquare(&g034).
+		Mul(&gB, &g034).
+		Mul(&gB, &g5).
+		Mul(&gB, &_g1)
+	_g1g2.Mul(&_g1, &g2)
+	gC.Mul(&_g3, &_g1g2).
+		CyclotomicSquare(&gC).
+		Mul(&gC, &_g1g2).
+		Mul(&gC, &g0).
+		CyclotomicSquare(&gC).
+		Mul(&gC, &g2).
+		Mul(&gC, &g0).
+		Mul(&gC, &g4)
+		// ht, hy = -1, -1
+		// c1 = ht**2+3*hy**2 = 4
+	h1.CyclotomicSquare(&gA).
+		CyclotomicSquare(&h1)
+	// c2 = ht+hy = -2
+	h2.CyclotomicSquare(&gB).
+		Conjugate(&h2)
+	h2g2C.CyclotomicSquare(&gC).
+		Mul(&h2g2C, &h2)
+	h4.CyclotomicSquare(&h2g2C).
+		Mul(&h4, &h2g2C).
+		CyclotomicSquare(&h4)
+	result.Mul(&h1, &h4).
+		Mul(&result, &f0_36)
+
+	return result
+}
+
+// MillerLoop Optimal Tate alternative (or twisted ate or Eta revisited)
+// Alg.2 in https://eprint.iacr.org/2021/1359.pdf
+func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
+	// check input size match
+	n := len(P)
+	if n == 0 || n != len(Q) {
+		return GT{}, errors.New("invalid inputs sizes")
+	}
+
+	// filter infinity points
+	p0 := make([]G1Affine, 0, n)
+	q := make([]G2Affine, 0, n)
+
+	for k := 0; k < n; k++ {
+		if P[k].IsInfinity() || Q[k].IsInfinity() {
+			continue
+		}
+		p0 = append(p0, P[k])
+		q = append(q, Q[k])
+	}
+
+	n = len(q)
+
+	// precomputations
+	pProj1 := make([]g1Proj, n)
+	p1 := make([]G1Affine, n)
+	p01 := make([]G1Affine, n)
+	p10 := make([]G1Affine, n)
+	pProj01 := make([]g1Proj, n) // P0+P1
+	pProj10 := make([]g1Proj, n) // P0-P1
+	l01 := make([]lineEvaluation, n)
+	l10 := make([]lineEvaluation, n)
+	for k := 0; k < n; k++ {
+		p1[k].Y.Neg(&p0[k].Y)
+		p1[k].X.Mul(&p0[k].X, &thirdRootOneG2)
+		pProj1[k].FromAffine(&p1[k])
+
+		// l_{p0,p1}(q)
+		pProj01[k].Set(&pProj1[k])
+		pProj01[k].AddMixedStep(&l01[k], &p0[k])
+		l01[k].r1.Mul(&l01[k].r1, &q[k].X)
+		l01[k].r0.Mul(&l01[k].r0, &q[k].Y)
+
+		// l_{p0,-p1}(q)
+		pProj10[k].Neg(&pProj1[k])
+		pProj10[k].AddMixedStep(&l10[k], &p0[k])
+		l10[k].r1.Mul(&l10[k].r1, &q[k].X)
+		l10[k].r0.Mul(&l10[k].r0, &q[k].Y)
+	}
+	BatchProjectiveToAffineG1(pProj01, p01)
+	BatchProjectiveToAffineG1(pProj10, p10)
+
+	// f_{a0+lambda*a1,P}(Q)
+	var result, ss GT
+	result.SetOne()
+	var l, l0 lineEvaluation
+
+	var j int8
+
+	// i = 189
+	for k := 0; k < n; k++ {
+		pProj1[k].DoubleStep(&l0)
+		l0.r1.Mul(&l0.r1, &q[k].X)
+		l0.r0.Mul(&l0.r0, &q[k].Y)
+		result.MulBy034(&l0.r0, &l0.r1, &l0.r2)
+	}
+
+	var tmp G1Affine
+	for i := 188; i >= 0; i-- {
+		result.Square(&result)
+
+		j = loopCounter1[i]*3 + loopCounter0[i]
+
+		for k := 0; k < n; k++ {
+			pProj1[k].DoubleStep(&l0)
+			l0.r1.Mul(&l0.r1, &q[k].X)
+			l0.r0.Mul(&l0.r0, &q[k].Y)
+
+			switch j {
+			case -4:
+				tmp.Neg(&p01[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case -3:
+				tmp.Neg(&p1[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case -2:
+				pProj1[k].AddMixedStep(&l, &p10[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case -1:
+				tmp.Neg(&p0[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 0:
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2)
+			case 1:
+				pProj1[k].AddMixedStep(&l, &p0[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 2:
+				tmp.Neg(&p10[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case 3:
+				pProj1[k].AddMixedStep(&l, &p1[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 4:
+				pProj1[k].AddMixedStep(&l, &p01[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			default:
+				return GT{}, errors.New("invalid loopCounter")
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// DoubleStep doubles a point in Homogenous projective coordinates, and evaluates the line in Miller loop
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g1Proj) DoubleStep(evaluations *lineEvaluation) {
+
+	// get some Element from our pool
+	var t1, A, B, C, D, E, EE, F, G, H, I, J, K fp.Element
+	A.Mul(&p.x, &p.y)
+	A.Halve()
+	B.Square(&p.y)
+	C.Square(&p.z)
+	D.Double(&C).
+		Add(&D, &C)
+	// E.Mul(&D, &bCurveCoeff)
+	E.Set(&D)
+	F.Double(&E).
+		Add(&F, &E)
+	G.Add(&B, &F)
+	G.Halve()
+	H.Add(&p.y, &p.z).
+		Square(&H)
+	t1.Add(&B, &C)
+	H.Sub(&H, &t1)
+	I.Sub(&E, &B)
+	J.Square(&p.x)
+	EE.Square(&E)
+	K.Double(&EE).
+		Add(&K, &EE)
+
+	// X, Y, Z
+	p.x.Sub(&B, &F).
+		Mul(&p.x, &A)
+	p.y.Square(&G).
+		Sub(&p.y, &K)
+	p.z.Mul(&B, &H)
+
+	// Line evaluation
+	evaluations.r0.Neg(&H)
+	evaluations.r1.Double(&J).
+		Add(&evaluations.r1, &J)
+	evaluations.r2.Set(&I)
+}
+
+// AddMixedStep point addition in Mixed Homogenous projective and Affine coordinates
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g1Proj) AddMixedStep(evaluations *lineEvaluation, a *G1Affine) {
+
+	// get some Element from our pool
+	var Y2Z1, X2Z1, O, L, C, D, E, F, G, H, t0, t1, t2, J fp.Element
+	Y2Z1.Mul(&a.Y, &p.z)
+	O.Sub(&p.y, &Y2Z1)
+	X2Z1.Mul(&a.X, &p.z)
+	L.Sub(&p.x, &X2Z1)
+	C.Square(&O)
+	D.Square(&L)
+	E.Mul(&L, &D)
+	F.Mul(&p.z, &C)
+	G.Mul(&p.x, &D)
+	t0.Double(&G)
+	H.Add(&E, &F).
+		Sub(&H, &t0)
+	t1.Mul(&p.y, &E)
+
+	// X, Y, Z
+	p.x.Mul(&L, &H)
+	p.y.Sub(&G, &H).
+		Mul(&p.y, &O).
+		Sub(&p.y, &t1)
+	p.z.Mul(&E, &p.z)
+
+	t2.Mul(&L, &a.Y)
+	J.Mul(&a.X, &O).
+		Sub(&J, &t2)
+
+	// Line evaluation
+	evaluations.r0.Set(&L)
+	evaluations.r1.Neg(&O)
+	evaluations.r2.Set(&J)
+}
diff --git a/ecc/bw6-756/pairing_test.go b/ecc/bw6-756/pairing_test.go
new file mode 100644
index 000000000..7db065814
--- /dev/null
+++ b/ecc/bw6-756/pairing_test.go
@@ -0,0 +1,306 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestPairing(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+
+	genR1 := GenFr()
+	genR2 := GenFr()
+
+	properties.Property("[BW6-756] Having the receiver as operand (final expo) should output the same result", prop.ForAll(
+		func(a GT) bool {
+			b := a
+			b = FinalExponentiation(&a)
+			a = FinalExponentiation(&a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Exponentiating FinalExpo(a) to r should output 1", prop.ForAll(
+		func(a GT) bool {
+			b := FinalExponentiation(&a)
+			return !a.IsInSubGroup() && b.IsInSubGroup()
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Expt(Expt) and Exp(t^2) should output the same result in the cyclotomic subgroup", prop.ForAll(
+		func(a GT) bool {
+			var b, c, d GT
+			b.Conjugate(&a)
+			a.Inverse(&a)
+			b.Mul(&b, &a)
+
+			a.Frobenius(&b).
+				Mul(&a, &b)
+
+			c.Expt(&a).Expt(&c)
+			d.Exp(&a, xGen).Exp(&d, xGen)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] bilinearity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var res, resa, resb, resab, zero GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint, ab big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+			ab.Mul(&abigint, &bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			res, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+			resa, _ = Pair([]G1Affine{ag1}, []G2Affine{g2GenAff})
+			resb, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{bg2})
+
+			resab.Exp(&res, ab)
+			resa.Exp(&resa, bbigint)
+			resb.Exp(&resb, abigint)
+
+			return resab.Equal(&resa) && resab.Equal(&resb) && !res.Equal(&zero)
+
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] MillerLoop of pairs should be equal to the product of MillerLoops", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var simpleProd, factorizedProd GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			P0 := []G1Affine{g1GenAff}
+			P1 := []G1Affine{ag1}
+			Q0 := []G2Affine{g2GenAff}
+			Q1 := []G2Affine{bg2}
+
+			// FE( ML(a,b) * ML(c,d) * ML(e,f) * ML(g,h) )
+			M1, _ := MillerLoop(P0, Q0)
+			M2, _ := MillerLoop(P1, Q0)
+			M3, _ := MillerLoop(P0, Q1)
+			M4, _ := MillerLoop(P1, Q1)
+			simpleProd.Mul(&M1, &M2).Mul(&simpleProd, &M3).Mul(&simpleProd, &M4)
+			simpleProd = FinalExponentiation(&simpleProd)
+
+			tabP := []G1Affine{g1GenAff, ag1, g1GenAff, ag1}
+			tabQ := []G2Affine{g2GenAff, g2GenAff, bg2, bg2}
+
+			// FE( ML([a,c,e,g] ; [b,d,f,h]) ) -> saves 3 squares in Fqk
+			factorizedProd, _ = Pair(tabP, tabQ)
+
+			return simpleProd.Equal(&factorizedProd)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] PairingCheck", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var g1GenAffNeg G1Affine
+			g1GenAffNeg.Neg(&g1GenAff)
+			tabP := []G1Affine{g1GenAff, g1GenAffNeg}
+			tabQ := []G2Affine{g2GenAff, g2GenAff}
+
+			res, _ := PairingCheck(tabP, tabQ)
+
+			return res
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] MillerLoop should skip pairs with a point at infinity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var one GT
+
+			var ag1, g1Inf G1Affine
+			var bg2, g2Inf G2Affine
+
+			var abigint, bbigint big.Int
+
+			one.SetOne()
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			g1Inf.FromJacobian(&g1Infinity)
+			g2Inf.FromJacobian(&g2Infinity)
+
+			// e([0,c] ; [b,d])
+			tabP := []G1Affine{g1Inf, ag1}
+			tabQ := []G2Affine{g2GenAff, bg2}
+			res1, _ := Pair(tabP, tabQ)
+
+			// e([a,c] ; [0,d])
+			tabP = []G1Affine{g1GenAff, ag1}
+			tabQ = []G2Affine{g2Inf, bg2}
+			res2, _ := Pair(tabP, tabQ)
+
+			// e([0,c] ; [d,0])
+			tabP = []G1Affine{g1Inf, ag1}
+			tabQ = []G2Affine{bg2, g2Inf}
+			res3, _ := Pair(tabP, tabQ)
+
+			return res1.Equal(&res2) && !res2.Equal(&res3) && res3.Equal(&one)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkPairing(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkMillerLoop(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		MillerLoop([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkFinalExponentiation(b *testing.B) {
+
+	var a GT
+	a.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		FinalExponentiation(&a)
+	}
+
+}
+
+func BenchmarkMultiMiller(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				MillerLoop(P, Q)
+			}
+		})
+	}
+}
+
+func BenchmarkMultiPair(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				Pair(P, Q)
+			}
+		})
+	}
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index cea2fb10d..3615cf261 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, BLS12-378 and BW6-756 elliptic curves implementation (+pairing).
 //
 // Also
 //
@@ -45,11 +45,12 @@ const (
 	BLS24_315
 	BW6_761
 	BW6_633
+	BW6_756
 )
 
 // Implemented return the list of curves fully implemented in gnark-crypto
 func Implemented() []ID {
-	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378}
+	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378, BW6_756}
 }
 
 func (id ID) String() string {
@@ -69,6 +70,8 @@ func (id ID) String() string {
 		return "bw6_633"
 	case BLS24_315:
 		return "bls24_315"
+	case BW6_756:
+		return "bw6_756"
 	default:
 		panic("unimplemented ecc ID")
 	}
@@ -93,6 +96,8 @@ func (id ID) Info() Info {
 		return newInfo(&config.BW6_633)
 	case BLS24_315:
 		return newInfo(&config.BLS24_315)
+	case BW6_756:
+		return newInfo(&config.BW6_756)
 	default:
 		panic("unimplemented ecc ID")
 	}
diff --git a/ecc/ecc.md b/ecc/ecc.md
index b394e4d72..9a6bfab85 100644
--- a/ecc/ecc.md
+++ b/ecc/ecc.md
@@ -6,6 +6,8 @@
 * BW6-761 (EC supporting pairing on BLS12-377 field of definition)
 * BLS24-315
 * BW6-633 (EC supporting pairing on BLS24-315 field of definition)
+* BLS12-378 (GT-strong SNARK-friendly)
+* BW6-756 (EC supporting pairing on BLS12-378 field of definition)
 
 ### Twisted edwards curves
 
diff --git a/internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0 b/internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
new file mode 100644
index 0000000000000000000000000000000000000000..6833575420e8c8195f285e34aebf84e69474299f
GIT binary patch
literal 6633
zcmXBY4`kKv9>DSEIe&I%l9^0*W+q7{lVmc<B-5RAXJ#^!Ze};>w>y(bl5RJXBuSDa
zNs=U)BuSDaNs=T<k|arzB+1$RyuQcpc|Fhj^ZV_WowIYk-?Lih|5wWSZ-dkS`S<j{
z{>t-*Ql3AUQv3XW%Q=_+Ic3W675~VUl=DFBDb@Zy7hhK^x1~}kl`5&!ka9zA^mR?P
z1-6B@X8)l@EwZ)R7Telv?Y0hEr)`OCsco5Uxvk5#!q#nDX<KDmZR@eEvGv;4rrev?
zrQDbMQeL0er@SF=NVz}vr`(n|rug;;Qr?s|rMx+BPI)j7raY8~QXbC3DUalll)Lhl
zlt=Su%42yf<*j*Z%G>g`l(*;YDUavzlqd2;$~}2U$~*JUly~J_DNp9fl&A7k%F}r|
z<=uIA$}@Q;<<)sl%6s$Pl=tO*Deuqw;|V?x%kx}3xvljleKya}#=kopD~=<v;y4;B
zj$^UnI36pG6R}cWljl#ya4J?Dr(?x&CRQA0W5sbUR?01T{(KA<V#RSWRvedN#c?@S
z99Lq+aWz)T@%>zj;d-n%=3>QhBUT(YW5sbRRvfou#c?NA9Cu@-yeQA#i{XB(I3C1`
z<6*3nD|!A=43A^Q@g!CpPh-XLELI%PW5w|zRva&5#qlau9Is=g+?40v#PBv&9PeVq
z@jg}@A7aJvF;*O(V#V<}RvceqrMxcBe~sZ=tT?{MisMJDIDW>8<5#RWe#eUAPpmj<
zu~Kf%wVYCHB}4_$fM`TCAr>GOBK$ecq6M#!n)gR7p86mbBiaz{hz?Tmhjp5lAeJJQ
zA(kV$5GxSfh?R&{q~cg@?m?_U^di<G)*<?+Kaln24TyfkM#KPO6RCKpo6UoWA;d6Z
z1hEA%iWoy|B^3{Bn|V8895I2|f!K-Ig_uN4A*QK6FT2e%h&_nCh<%9thy#d&q~d3E
z$UKWUj5vZgia3TiPAZ=E6XuhMQ;5@eht={K^I60>#CgO8#6`p<#AU=4Qt`^YYQBcJ
zj+jH-K-@&!Lfj@5uiQK4yNG*;`-lgKhlodr$A~A0r=;R#{>=Ow@dEJ@@e1)8@do#a
zT7GMOhj@?pfcQxLUH{4a8Sw@274Z%69q|M46Y+~wykCEt{~(HgB1#2Y2@yZgxWC5M
zfM~=;y;f>6FF-6rG$VXXi&%tcMJy({Se4q$?T8LUCt?X=DPkF7Iid^K{#t2;xf`((
zu?n#o(Suk+{Soz=*CN&-`Vi|88xZ}7jU<<|(tvprVl!e8F+_0%Ee)GT5L*zVh%v-g
z#5Tlse6FaK#?2Fm9f+NXU5H7<6k-~&o8mH4nlbM|>_zND>_;3x97G&K%pwj`T$xKp
z%tsN&5XTWG5GN6*5U25(qgFa&K8rYqIFGo1xQMt!{h?hpUqM_&Tti$(%pq=2|GC~Y
z-$L9*+(Fz$+(X<)JU~1|Jfi-ReQbV$c#3$2c#e31c!_w0cun!<F1<0oMZ812M|?nh
zM0}$DQu=KEg7}K~hWL*7f%u8|h4@WzyD0rJ*N7+=Y$b}@MY&>bKr|wn5DO3s5zUAe
z#3Dp1Vll-HBz{SVZbx(=I!UEkd5LK$Vi{sNBCajvF0lg9jaZ3Tg;<T~L99XaQrv;d
zYt8EreTeml4TyfkMv8Y)dBD60u^BOl7(xsqMi5&NqlhuYR>U^Mc8a@PdE7jK*nx=a
zOL?c*g_uN4QU89Y&ASmZh&_nCh<%9t6mO040rNq`A;c`=FyaW}DB>96IN}82B;pjw
z=hX6P^BKfh#5u%y#08R%!R3qQONh&eD~PLzYt&zk*UfW?8;F~TTZr3;JBYi8dx-lK
zm&Nh}^FzcV#AC!0#8bpG#B+-4WBG;oCE^w0HR27$t*iXj{0{LR5m%S;2k{Z{3Go^6
z1@RT}4e=fE1Hb0h%0JD&5Wf+B5H%ty1zQPGq4<nnX)re;nh*;R3lYtT7Q`Y%D`GLl
zcUNgMw<9_boroofrHEyS<%llC3W^tGrQ5s`u?n#o5zkDeN322gQoPD4Yt8EreTeml
z4TyfkMv9+oWx%`%u^BOl7(xsqMi5&Nqlhv5wMMP7)w~U{9WjoWK<uEnA60gmcOfPb
zQ;2EAZo~}r|9OvjFJd2JKjHx5AmR}9*UPN=FyaW}DB>96IN}82B;pj}G{yC>a>jfX
zaSm}FaRG4=aS3sm;=)+DV!n#FhPaNHL)<{zq`2o+ZkcZ*?jY_W?ji0Y9v~hf9w8nh
zo*<r5|2ChQpCeu%ULsym{2owwZGMAzi+G24kNAN2i1<YDi&EvY`3vGJ;v3>S;s@d<
z;uqpK;t!%maqq6Cn5!j31<`<LL^L55P<%A2E;KhIS`dp6t%$`GkEq&aZbx(=IuT0{
zOA*Tu%PDSE)h_c2L^on3VijUFq6e{t;^SPk*Sr?74$(*PQ>?BxZ$R`THX;TPn-H53
zgNPx-Fk*z{Z;z^5%yD(8j*2nFR_fovHuHAGIAQ{^1F;ja3o(h9LQEreQ~dQ#b;i60
z5!aXMUa=3cA8`P25OIi<JL8x&A4VKO9Hst1j+u`mP9RPqP9aVs&LGYr&LPetE>PTJ
zsu#_d5SI~G5LXe`5Z4iNh#M3i6RS7Pw-C1xcPK7A)w||<i2H~Kh=+(rh{uR0h^L5W
zi06nGh?j_0h}RUKajI|3ZxQbh?-3sm9}%A@eg~|6Hh)2UMSMeiNBp4v-ur3(h4_v5
zgQ!tFK@BP9h7zKJXh1X~nh*;R3lYtT7W|!atznV5mH3sXVX>(V(T?aqbRw1@mLirR
UmQ#FQXy`JpKy)KkB36<955X2p+5i9m

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000 b/internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
new file mode 100644
index 0000000000000000000000000000000000000000..d790b3f46ea5711f6eeb322c90ad8e05d6a8c1e5
GIT binary patch
literal 7376
zcmXBY4`kKv9>DSEIe&JOWG0iDBuSDaGc%b<(oJ?}W|EoRncd8OyE92<rrS-DBuTnS
zk|arzBuSDaNs=T<k|arzB+1$RyuL@z>v`Uv-*4rdvva=ZY|ZokE#>^T-syk;Yx<vm
z<@p0C&+kvEb^br)oJ;?hGUfP+zvoKId0Xr$)mr}-Uso&tl}e>ls-#jw$_;seuUlwa
zWLs=&^go)^5?ixvsjbD<YFlPoZd+knX<KDmZChhoYg=bqZ`)vNv$fkgY@N1^DR<_s
zl)H0x%A4}0lsD(iDR0SJQf|pxQ{4P*DR0l)Q|`$<Dfi~yl>2gD%Kf=N<$*kq@|wIO
z<(+wF%7b|@<)J*3@^Btbc_fddJeo&S9?N4Xx943ckLU4}C-OwflX)`bsXUePbe>Lm
zcix@yp1ddJw!AmxnLLy7Y@SVdF3-gSyf2o=xp;7!>ks<DJby6$+o4!-9F7&okyvpY
zjTOhSSaBSWm2yX(KM}*pSaF<+702mVah!=2$JtmZH|6<rF`SPT$AwsNT#OaRrC4!X
zjupq1SSiQtxf;W@SaDpB6~~QOaomg*$E{d#+>RB;omg?)jg|6}Jby2S`?2D95G#&{
zu~M$&`A0E4jupp~SaCd!700t!aXgO|$BS5Tyo?pct5|Woj+OGlJpU$!x3S`Q7b}kU
zvEuj;D~^w`;`kIRj?b~;_!2ASt~~!WhHtUr_#P{cAF<;287q!ovEuk0D~><0;;6++
zxiQyrO0ks?6+{DK0b(Iy5n?gIztbq1@EoamebnNi4`L~z1<{IFMk@ZX<>nQLm55b{
z)rd8SwTN|y^@t6m;%GCsBRUYBh>eIYL^t(2vdO#|u?4Xeu??}ERNU1bb1$L~(T^BF
z>_F^93?ha|#T^?qk03@7V~Aadal`~-5;29CrvA9>Ht#{~Ma&>(5p#%ri2bDEdvw5j
z5OD}`7;yw~6mg7HJnYBKClDtQr|=4^<<sUfh_i@ui1UaGh>M6zh|8qnnR~^26>$x5
z9dQG36LAZ1n^Zh=@0jl*?ji0Y9v~hf9w8nho*<r*il_NA^K--t#7o30#B0PGyie5f
zTk|`_d&CFCN9wQoPv*~vFNm**Z;0=RABdlbU!>yo`rG^mQT&N06>KF$d_&{?HMR!C
z0$kK<rG@52h{cFTgs*85OAyV7r6d=tQj572u?(>su>!FYu?n#ou?E-vT4}9$9b!FV
z1ELMlj_9C%k2=j85nYIG#3saM#1_O>lFM0Xn|V8;2hoe@qqu^W`ppA~9f+NXLBtSZ
z7%_s+6}8f+c?_`&F^-r(Od_Tb(}>*^mzmNY^IpUZViqxn*oWAUIDj~aI7D$}E*&->
zK^#RKLmWq(K%7LJ!e@?J>9qL_;w<7E;ymI4;v)6CcFBAhaRqS|aSd@DafABz^``k2
z;x^(A;x6JI;y&U5;vwP@^{4D(^Ap5V#52Tm#0$hr#4E&WiWhh3jrlF&9pXLW1L7m%
z6ZNOkXY&`tSHw5Ocf=3GPsA_8Z;H2z(jRk;h;qSJqIkO~SIiBF1&D=+MTo_SMnn^0
z38ERXl;RDf++uD;EJG|Om1^Y`rj>|Qh}DR=wv^Y1wTN|y^@t6KHbgt31JOzG4qV=7
z?m~1UHX$}6wjj1ryo$=(%-az?h+ae=q8~AU*n!xI7(@&qh7lta?{ei)^B7_mBCapx
zaWR3IL`+e?z0>C1h&_nCh#ABzVvgdaQQl|Xk2ruhh&Y5ej5vZgia3TijyQohN%A?h
ze9C+paRzY~aSm~w<YRF8g83ri65=xA3gRmDr{gvAb;J$CO~ft4ZNweKUBo@aeTvIs
z`GNT%;t}F8;tAp@;u+#O#r3iL!u%5P3h^58hT^TO{MP&q@g5OZm+}Yk5%CG}8Sw@2
z74Z%69q|J{=GMwT&A$-85q}UhA}R%22~nZ=j9+OmFF-6rEJ7?sG$NW1OAyV7r4+ZW
z(qe8!EJG|utU#<ptU|0ttU;`$ctTdznb#vWAleY|$W+=z2cnbWSytI-?m~1UHX$}6
zwjj1rd}k}$%-az?h+ae=q8~AU*n!xI7{sqNYLy}LFk%ETiWo!eqIf^5jGHGAlZYwA
zG-5Ym5B0yi*F1xmMa&`gA@(B<P=CH0G#^47MjSyLMI1vMN1Q;MM4Y0y9#&49&mhhs
z&LPetE+8%<E>T<<E0@hz5LXe`5Z4hm5H~5_b1S#Zw-I*`cM<mx_Yn^e4-t<Lj}cE0
zPpRL`XXfXK7l@aLR}?=7R9>6kAl@S0A>JcCAU+~KQT(7(`E34z_=@<3_>TC2_=)(1
z_>K63s8PIkS5wT@5~6}=KrBEkL@c8CXjWZpZbUR8mLQrDODXPAwZ+_uScX`RSb<oH
zScO<k@m5t`V_u6`hggr;fM`RsBRVKP&Q&|j8xdWIZi??>b(48BVhds`VjE&Rq6g87
z=tJ})21tH;RNY~Yt4npK7(@(FzX`+U5yU8B46zF_j+j79BBl`2h}{&w-l^^}??uG*
zr8*;K5p#%ri2aBIq`W+igXTks!-yl)@5oW}F~o7i3B*anDa2{S8N^w{ImCI2x0vb$
z^F_oZ#AU=4#8t#K#C60CijRrao90`H+lV_97oO@}^F73U!~?`b#3RIG#1q6*#52Tm
z#0$hr#4E&WiqAOJH|Dp9cZm0h4~UP5PZU1`RzI7+Aig5LA-*GiP=D?HH2*^UM*Kn4
zC?24O6mvrfQ9(2y79bWP79kcR8WBzSopY^WiMg5hk*8s)sRhxBScX`RSb<oHScO<k
z@p++Pjd?9%9b!FV1ELK#pxxAg=tOKpxXE3j8?gzo8L<Vi6|oJm9npj6Mf4&15piv4
z7!W%UI}wA3A;d6Z1Tl&jL+nC~BPI}&h$+N0VmD$BVlQF_F^iZ(>_hBF96%gI96}sM
z96=mK977yOoIspJoI;#NoI#vLoI{*PTtHkzTtZw%TtQq#Tti$(+(6t!+(O(&+(Fz$
f+(X<)JU~1|JVHE1JV87~JVQK3yg<A}ydwP{f(%R!

literal 0
HcmV?d00001

diff --git a/internal/generator/config/bw6-756.go b/internal/generator/config/bw6-756.go
new file mode 100644
index 000000000..37a8d043f
--- /dev/null
+++ b/internal/generator/config/bw6-756.go
@@ -0,0 +1,28 @@
+package config
+
+var BW6_756 = Curve{
+	Name:         "bw6-756",
+	CurvePackage: "bw6756",
+	EnumID:       "BW6_756",
+	FrModulus:    "605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417",
+	FpModulus:    "366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+		Projective:       true,
+	},
+	G2: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+	},
+}
+
+func init() {
+	addCurve(&BW6_756)
+}
diff --git a/internal/generator/tower/generate.go b/internal/generator/tower/generate.go
index 5c4107907..74c9f7a66 100644
--- a/internal/generator/tower/generate.go
+++ b/internal/generator/tower/generate.go
@@ -12,7 +12,7 @@ import (
 
 // Generate generates a tower 2->6->12 over fp
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_761) || conf.Equal(config.BW6_633) || conf.Equal(config.BLS24_315) {
+	if conf.Equal(config.BW6_756) || conf.Equal(config.BW6_761) || conf.Equal(config.BW6_633) || conf.Equal(config.BLS24_315) {
 		return nil
 	}
 

From c2ada0d25c1f49d50b4de9e2adf3060ae9c9d9df Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 28 Dec 2021 12:26:34 +0100
Subject: [PATCH 07/29] build: templates for bw6-756

---
 ecc/bw6-633/g1.go                             |  1 +
 ecc/bw6-633/g2.go                             |  1 +
 ecc/bw6-756/fr/fft/fft.go                     |  3 +-
 ecc/bw6-756/fr/fft/fuzz.go                    |  1 +
 ecc/bw6-756/g1.go                             |  2 -
 ecc/bw6-756/g1_test.go                        |  6 ++-
 ecc/bw6-756/g2.go                             |  1 -
 ecc/bw6-756/g2_test.go                        |  6 ++-
 .../crypto/signature/eddsa/generate.go        |  4 ++
 .../signature/eddsa/template/eddsa.go.tmpl    |  8 +--
 internal/generator/ecc/template/point.go.tmpl | 54 ++++++++++++++++++-
 .../ecc/template/tests/marshal.go.tmpl        |  2 +-
 .../ecc/template/tests/point.go.tmpl          |  2 +-
 internal/generator/edwards/generate.go        |  4 ++
 .../generator/fft/template/domain.go.tmpl     |  3 ++
 .../generator/fft/template/imports.go.tmpl    |  4 ++
 .../pairing/template/tests/pairing.go.tmpl    |  4 +-
 17 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/ecc/bw6-633/g1.go b/ecc/bw6-633/g1.go
index 17baa6bec..0c9ca84df 100644
--- a/ecc/bw6-633/g1.go
+++ b/ecc/bw6-633/g1.go
@@ -531,6 +531,7 @@ func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-633/g2.go b/ecc/bw6-633/g2.go
index c28ec1c5e..831624a1f 100644
--- a/ecc/bw6-633/g2.go
+++ b/ecc/bw6-633/g2.go
@@ -529,6 +529,7 @@ func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/fr/fft/fft.go b/ecc/bw6-756/fr/fft/fft.go
index 503f375ba..290c071a2 100644
--- a/ecc/bw6-756/fr/fft/fft.go
+++ b/ecc/bw6-756/fr/fft/fft.go
@@ -21,8 +21,9 @@ import (
 	"runtime"
 
 	"github.com/consensys/gnark-crypto/ecc"
-	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 	"github.com/consensys/gnark-crypto/internal/parallel"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 )
 
 // Decimation is used in the FFT call to select decimation in time or in frequency
diff --git a/ecc/bw6-756/fr/fft/fuzz.go b/ecc/bw6-756/fr/fft/fuzz.go
index 1c35691b5..98552dcf3 100644
--- a/ecc/bw6-756/fr/fft/fuzz.go
+++ b/ecc/bw6-756/fr/fft/fuzz.go
@@ -23,6 +23,7 @@ import (
 	"bytes"
 	"fmt"
 	"github.com/consensys/gnark-crypto/ecc"
+
 	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 )
 
diff --git a/ecc/bw6-756/g1.go b/ecc/bw6-756/g1.go
index 2a1f0a5bf..f3ac868b6 100644
--- a/ecc/bw6-756/g1.go
+++ b/ecc/bw6-756/g1.go
@@ -506,7 +506,6 @@ func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
 
 // ClearCofactor maps a point in E(Fp) to E(Fp)[r]
 func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
-
 	var L0, L1, uP, u2P, u3P, tmp G1Jac
 
 	uP.ScalarMultiplication(a, &xGen)
@@ -533,7 +532,6 @@ func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
 		AddAssign(&L0)
 
 	return p
-
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/g1_test.go b/ecc/bw6-756/g1_test.go
index a38dbeb3a..06d5277a6 100644
--- a/ecc/bw6-756/g1_test.go
+++ b/ecc/bw6-756/g1_test.go
@@ -38,7 +38,8 @@ func TestG1AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res1, res2 G1Jac
-			p = fuzzJacobianG1Affine(&g1Gen, a)
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
 			res1.phi(&p)
 			res2.mulWindowed(&p, &lambdaGLV)
 
@@ -50,7 +51,8 @@ func TestG1AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res, tmp G1Jac
-			p = fuzzJacobianG1Affine(&g1Gen, a)
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
 			tmp.phi(&p)
 			res.phi(&tmp).
 				AddAssign(&tmp).
diff --git a/ecc/bw6-756/g2.go b/ecc/bw6-756/g2.go
index 934f7d6f1..d7fbb9659 100644
--- a/ecc/bw6-756/g2.go
+++ b/ecc/bw6-756/g2.go
@@ -524,7 +524,6 @@ func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
 		AddAssign(&L1)
 
 	return p
-
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/g2_test.go b/ecc/bw6-756/g2_test.go
index c2fe39936..b068b6d3a 100644
--- a/ecc/bw6-756/g2_test.go
+++ b/ecc/bw6-756/g2_test.go
@@ -38,7 +38,8 @@ func TestG2AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res1, res2 G2Jac
-			p = fuzzJacobianG2Affine(&g2Gen, a)
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
 			res1.phi(&p)
 			res2.mulWindowed(&p, &lambdaGLV)
 
@@ -50,7 +51,8 @@ func TestG2AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res, tmp G2Jac
-			p = fuzzJacobianG2Affine(&g2Gen, a)
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
 			tmp.phi(&p)
 			res.phi(&tmp).
 				AddAssign(&tmp).
diff --git a/internal/generator/crypto/signature/eddsa/generate.go b/internal/generator/crypto/signature/eddsa/generate.go
index 1422a4137..dd4f82519 100644
--- a/internal/generator/crypto/signature/eddsa/generate.go
+++ b/internal/generator/crypto/signature/eddsa/generate.go
@@ -8,6 +8,10 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
+	if conf.Equal(config.BW6_756) {
+		return nil
+	}
+
 	// eddsa
 	conf.Package = "eddsa"
 	entries := []bavard.Entry{
diff --git a/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl b/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
index da28a4b4e..7d1262c0e 100644
--- a/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
+++ b/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
@@ -52,7 +52,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	var pub PublicKey
 	var priv PrivateKey
 
-    {{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+    {{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	// The source of randomness and the secret scalar must come
 	// from 2 distincts sources. Since the scalar is the size of the
 	// field of definition (48 bytes), the scalar must come from a
@@ -87,7 +87,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 
 	// prune the key
 	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
-	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	h1[0] &= 0xF8
 	h1[sizeFr-1] &= 0x7F
 	h1[sizeFr-1] |= 0x40
@@ -100,14 +100,14 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	// reverse first bytes because setBytes interpret stream as big endian
 	// but in eddsa specs s is the first 32 bytes in little endian
 	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
-		{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+		{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 		h1[i], h1[j] = h1[j], h1[i]
 		{{ else }}
 		h[i], h[j] = h[j], h[i]
 		{{ end }}
 	}
 
-	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	copy(priv.scalar[:], h1[:sizeFr])
 	{{ else }}
 	copy(priv.scalar[:], h[:sizeFr])
diff --git a/internal/generator/ecc/template/point.go.tmpl b/internal/generator/ecc/template/point.go.tmpl
index 1c7eae8a3..d536b51a0 100644
--- a/internal/generator/ecc/template/point.go.tmpl
+++ b/internal/generator/ecc/template/point.go.tmpl
@@ -416,7 +416,7 @@ func (p *{{ $TJacobian }}) IsOnCurve() bool {
 
 		}
 	{{- end}}
-{{else if eq .Name "bw6-761"}}
+{{else if or (eq .Name "bw6-761") (eq .Name "bw6-756")}}
 	// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
 	// Z[r,0]+Z[-lambda{{ $TAffine }}, 1] is the kernel
 	// of (u,v)->u+lambda{{ $TAffine }}v mod r. Expressing r, lambda{{ $TAffine }} as
@@ -749,6 +749,33 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	p.phi(&L1).AddAssign(&L0)
 
     return p
+{{else if eq .Name "bw6-756"}}
+	var L0, L1, uP, u2P, u3P, tmp G1Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+
+	L0.Set(a).AddAssign(&u3P).
+		SubAssign(&u2P)
+	tmp.Set(a).AddAssign(&u2P).
+		SubAssign(&uP).
+		SubAssign(&uP).
+		Double(&tmp)
+	L0.SubAssign(&tmp).
+		SubAssign(a)
+
+	L1.Set(a).AddAssign(&uP)
+	tmp.Set(&uP).SubAssign(a).
+		Double(&tmp).
+		SubAssign(&u2P)
+	L1.AddAssign(&tmp).
+		SubAssign(a)
+
+	p.phi(&L1).
+		AddAssign(&L0)
+
+	return p
 {{- end}}
 }
 {{ else }}
@@ -934,6 +961,31 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+{{else if eq .Name "bw6-756"}}
+
+	var L0, L1, uP, u2P, u3P, tmp G2Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+	// ht=-2, hy=0
+	// d1=1, d2=-1, d3=-1
+
+	L0.Set(a).
+		AddAssign(&u2P).
+		SubAssign(&uP)
+	tmp.Set(&u2P).
+		AddAssign(a).
+		SubAssign(&uP).
+		Double(&tmp)
+	L1.Set(&u3P).
+		SubAssign(&tmp)
+
+	p.phi(&L0).
+		AddAssign(&L1)
+
+	return p
+
 {{- end}}
 }
 {{- end}}
diff --git a/internal/generator/ecc/template/tests/marshal.go.tmpl b/internal/generator/ecc/template/tests/marshal.go.tmpl
index 11ba40f79..51a117544 100644
--- a/internal/generator/ecc/template/tests/marshal.go.tmpl
+++ b/internal/generator/ecc/template/tests/marshal.go.tmpl
@@ -345,7 +345,7 @@ func GenFp() gopter.Gen {
 // e2 e4 e12 e24 for bls24
 // e2 e6 e12 else */}}
 
-{{if or (eq .Name "bw6-633") (eq .Name "bw6-761")}}
+{{if or (eq .Name "bw6-633") (eq .Name "bw6-761") (eq .Name "bw6-756")}}
 	// GenE3 generates an E3 elmt
 	func GenE3() gopter.Gen {
 		return gopter.CombineGens(
diff --git a/internal/generator/ecc/template/tests/point.go.tmpl b/internal/generator/ecc/template/tests/point.go.tmpl
index 1e9aa3695..3d29df278 100644
--- a/internal/generator/ecc/template/tests/point.go.tmpl
+++ b/internal/generator/ecc/template/tests/point.go.tmpl
@@ -64,7 +64,7 @@ import (
         ))
 
         {{if eq .PointName "g2" }}
-        {{- if and (eq .PointName "g2") (ne .Name "bw6-761") (ne .Name "bw6-633") }}
+        {{- if and (eq .PointName "g2") (ne .Name "bw6-761") (ne .Name "bw6-633") (ne .Name "bw6-756") }}
             properties.Property("[{{ toUpper .Name }}] check that psi^2(P) = -phi(P)", prop.ForAll(
                 func(a {{ .CoordType}}) bool {
                     var p, res1, res2 {{ $TJacobian }}
diff --git a/internal/generator/edwards/generate.go b/internal/generator/edwards/generate.go
index 45f6bf1bd..b2ea5b5e1 100644
--- a/internal/generator/edwards/generate.go
+++ b/internal/generator/edwards/generate.go
@@ -8,6 +8,10 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
+	if conf.Equal(config.BW6_756) {
+		return nil
+	}
+
 	conf.Package = "twistededwards"
 
 	entries := []bavard.Entry{
diff --git a/internal/generator/fft/template/domain.go.tmpl b/internal/generator/fft/template/domain.go.tmpl
index 269adacb0..73366c30b 100644
--- a/internal/generator/fft/template/domain.go.tmpl
+++ b/internal/generator/fft/template/domain.go.tmpl
@@ -78,6 +78,9 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 	{{else if eq .Name "bw6-761"}}
 		rootOfUnity.SetString("32863578547254505029601261939868325669770508939375122462904745766352256812585773382134936404344547323199885654433")
 		const maxOrderRoot uint64 = 46
+	{{else if eq .Name "bw6-756"}}
+        rootOfUnity.SetString("199251335866470442271346949249090720992237796757894062992204115206570647302191425225605716521843542790404563904580")
+        const maxOrderRoot uint64 = 41
     {{else if eq .Name "bw6-633"}}
 		rootOfUnity.SetString("4991787701895089137426454739366935169846548798279261157172811661565882460884369603588700158257")
 		const maxOrderRoot uint64 = 20
diff --git a/internal/generator/fft/template/imports.go.tmpl b/internal/generator/fft/template/imports.go.tmpl
index f2b26bcc7..858fe8afe 100644
--- a/internal/generator/fft/template/imports.go.tmpl
+++ b/internal/generator/fft/template/imports.go.tmpl
@@ -10,6 +10,8 @@
 	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
 {{ else if eq .Name "bw6-761"}}
 	"github.com/consensys/gnark-crypto/ecc/bw6-761/fr"
+{{ else if eq .Name "bw6-756"}}
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 {{ else if eq .Name "bw6-633"}}
 	"github.com/consensys/gnark-crypto/ecc/bw6-633/fr"
 {{ else if eq .Name "bls24-315"}}
@@ -29,6 +31,8 @@
 	curve "github.com/consensys/gnark-crypto/ecc/bn254"
 {{else if eq .Name "bw6-761"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bw6-761"
+{{else if eq .Name "bw6-756"}}
+	curve "github.com/consensys/gnark-crypto/ecc/bw6-756"
 {{else if eq .Name "bw6-633"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bw6-633"
 {{ else if eq .Name "bls24-315"}}
diff --git a/internal/generator/pairing/template/tests/pairing.go.tmpl b/internal/generator/pairing/template/tests/pairing.go.tmpl
index 09a0840a9..b4a47335b 100644
--- a/internal/generator/pairing/template/tests/pairing.go.tmpl
+++ b/internal/generator/pairing/template/tests/pairing.go.tmpl
@@ -18,7 +18,7 @@ func TestPairing(t *testing.T) {
 
 	properties := gopter.NewProperties(parameters)
 
-    {{if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+    {{if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	genA := GenE6()
 	{{else if eq .Name "bls24-315"}}
 	genA := GenE24()
@@ -52,7 +52,7 @@ func TestPairing(t *testing.T) {
 			b.Conjugate(&a)
 			a.Inverse(&a)
 			b.Mul(&b, &a)
-            {{if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+            {{if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 			a.Frobenius(&b).
 			{{else if eq .Name "bls24-315"}}
 			a.FrobeniusQuad(&b).

From abbe7591b5eef9fd6de6210f33e004e2f8d06bcb Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 31 Dec 2021 15:27:58 +0100
Subject: [PATCH 08/29] feat(bw6-756): add companion twisted Edwards

---
 README.md                                     |  18 +-
 ecc/bw6-756/twistededwards/doc.go             |  18 +
 ecc/bw6-756/twistededwards/eddsa/doc.go       |  22 +
 ecc/bw6-756/twistededwards/eddsa/eddsa.go     | 274 +++++++++++
 .../twistededwards/eddsa/eddsa_test.go        | 208 ++++++++
 ecc/bw6-756/twistededwards/eddsa/marshal.go   | 133 +++++
 ecc/bw6-756/twistededwards/point.go           | 411 ++++++++++++++++
 ecc/bw6-756/twistededwards/twistededwards.go  |  63 +++
 .../twistededwards/twistededwards_test.go     | 456 ++++++++++++++++++
 ecc/ecc.go                                    |   2 +-
 hash/hashes.go                                |   7 +
 .../crypto/signature/eddsa/generate.go        |   4 -
 internal/generator/edwards/generate.go        |   4 -
 signature/signature.go                        |   3 +-
 14 files changed, 1604 insertions(+), 19 deletions(-)
 create mode 100644 ecc/bw6-756/twistededwards/doc.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/doc.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/eddsa.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/marshal.go
 create mode 100644 ecc/bw6-756/twistededwards/point.go
 create mode 100644 ecc/bw6-756/twistededwards/twistededwards.go
 create mode 100644 ecc/bw6-756/twistededwards/twistededwards_test.go

diff --git a/README.md b/README.md
index c7fadb01d..832b33bb2 100644
--- a/README.md
+++ b/README.md
@@ -3,17 +3,17 @@
 [![License](https://img.shields.io/badge/license-Apache%202-blue)](LICENSE)  [![Go Report Card](https://goreportcard.com/badge/github.com/ConsenSys/gnark-crypto)](https://goreportcard.com/badge/github.com/ConsenSys/gnark-crypto) [![PkgGoDev](https://pkg.go.dev/badge/mod/github.com/consensys/gnark-crypto)](https://pkg.go.dev/mod/github.com/consensys/gnark-crypto)
 
 `gnark-crypto` provides:
-* [Elliptic curve cryptography](ecc/ecc.md) (+pairing) on BN254, BLS12-381, BLS12-377, BW6-761, BLS24-315 and BW6-633
+* [Elliptic curve cryptography](ecc/ecc.md) (+pairing) on BN254, BLS12-381, BLS12-377, BW6-761, BLS24-315, BW6-633, BLS12-378 and BW6-756
 * [Finite field arithmetic](field/field.md) (fast big.Int)
 * FFT
 * Polynomial commitment schemes
 * MiMC
 * EdDSA (on the "companion" twisted edwards curves)
 
-  
+
 
 `gnark-crypto` is actively developed and maintained by the team (zkteam@consensys.net | [HackMD](https://hackmd.io/@zkteam)) behind:
-* [`gnark`: a framework to execute (and verify) algorithms in zero-knowledge](https://github.com/consensys/gnark) 
+* [`gnark`: a framework to execute (and verify) algorithms in zero-knowledge](https://github.com/consensys/gnark)
 
 
 ## Warning
@@ -28,7 +28,7 @@
 
 `gnark-crypto` is tested with the last 2 major releases of Go (1.16 and 1.17).
 
-### Install `gnark-crypto` 
+### Install `gnark-crypto`
 
 ```bash
 go get github.com/consensys/gnark-crypto
@@ -44,27 +44,27 @@ The APIs are consistent accross the curves. For example, [here is `bn254` godoc]
 
 ### Development
 
-Most (but not all) of the code is generated from the templates in `internal/generator`. 
+Most (but not all) of the code is generated from the templates in `internal/generator`.
 
 The generated code contains little to no interfaces and is strongly typed with a base field (generated by the `gnark-crypto/field`). The two main factors driving this design choice are:
 
-1. Performance: `gnark-crypto` algorithms manipulates millions (if not billions) of field elements. Interface indirection at this level, plus garbage collection indexing takes a heavy toll on perf.  
+1. Performance: `gnark-crypto` algorithms manipulates millions (if not billions) of field elements. Interface indirection at this level, plus garbage collection indexing takes a heavy toll on perf.
 2. No generics in Go: need to derive (mostly) identical code for various moduli and curves, with consistent APIs
 
 To regenerate the files, see `internal/generator/main.go`. Run:
 ```
 go generate ./internal/...
-``` 
+```
 
 ## Benchmarks
 
-[Benchmarking pairing-friendly elliptic curves libraries](https://hackmd.io/@zkteam/eccbench) 
+[Benchmarking pairing-friendly elliptic curves libraries](https://hackmd.io/@zkteam/eccbench)
 
 >The libraries are implemented in different languages and some use more assembly code than others. Besides the different algorithmic and software optimizations used across, it should be noted also that some libraries target constant-time implementation for some operations making it de facto slower. However, it can be clear that consensys/gnark-crypto is one of the fastest pairing-friendly elliptic curve libraries to be used in zkp projects with different curves.
 
 ## Versioning
 
-We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/consensys/gnark-crypto/tags). 
+We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/consensys/gnark-crypto/tags).
 
 
 ## License
diff --git a/ecc/bw6-756/twistededwards/doc.go b/ecc/bw6-756/twistededwards/doc.go
new file mode 100644
index 000000000..771de3887
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package twistededwards provides bw6-756's twisted edwards "companion curve" defined on fr.
+package twistededwards
diff --git a/ecc/bw6-756/twistededwards/eddsa/doc.go b/ecc/bw6-756/twistededwards/eddsa/doc.go
new file mode 100644
index 000000000..65fdfe7af
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/doc.go
@@ -0,0 +1,22 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package eddsa provides EdDSA signature scheme on bw6-756's twisted edwards curve.
+//
+// See also
+//
+// https://en.wikipedia.org/wiki/EdDSA
+package eddsa
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa.go b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
new file mode 100644
index 000000000..f5ca9d161
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
@@ -0,0 +1,274 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"errors"
+	"hash"
+	"io"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/twistededwards"
+	"github.com/consensys/gnark-crypto/signature"
+	"golang.org/x/crypto/blake2b"
+)
+
+var errNotOnCurve = errors.New("point not on curve")
+
+const (
+	sizeFr         = fr.Bytes
+	sizePublicKey  = sizeFr
+	sizeSignature  = 2 * sizeFr
+	sizePrivateKey = 2*sizeFr + 32
+)
+
+// PublicKey eddsa signature object
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type PublicKey struct {
+	A twistededwards.PointAffine
+}
+
+// PrivateKey private key of an eddsa instance
+type PrivateKey struct {
+	PublicKey PublicKey    // copy of the associated public key
+	scalar    [sizeFr]byte // secret scalar, in big Endian
+	randSrc   [32]byte     // source
+}
+
+// Signature represents an eddsa signature
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type Signature struct {
+	R twistededwards.PointAffine
+	S [sizeFr]byte
+}
+
+func init() {
+	signature.Register(signature.EDDSA_BW6_756, GenerateKeyInterfaces)
+}
+
+// GenerateKey generates a public and private key pair.
+func GenerateKey(r io.Reader) (PrivateKey, error) {
+
+	c := twistededwards.GetEdwardsCurve()
+
+	var pub PublicKey
+	var priv PrivateKey
+
+	// The source of randomness and the secret scalar must come
+	// from 2 distincts sources. Since the scalar is the size of the
+	// field of definition (48 bytes), the scalar must come from a
+	// different digest so there is no overlap between the source of
+	// randomness and the scalar.
+
+	// used for random scalar (aka private key)
+	seed := make([]byte, 32)
+	_, err := r.Read(seed)
+	if err != nil {
+		return priv, err
+	}
+	h1 := blake2b.Sum512(seed[:])
+
+	// used for the source of randomness when hashing the message
+	h2 := blake2b.Sum512(h1[:])
+	for i := 0; i < 32; i++ {
+		priv.randSrc[i] = h2[i]
+	}
+
+	// prune the key
+	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
+
+	h1[0] &= 0xF8
+	h1[sizeFr-1] &= 0x7F
+	h1[sizeFr-1] |= 0x40
+
+	// reverse first bytes because setBytes interpret stream as big endian
+	// but in eddsa specs s is the first 32 bytes in little endian
+	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
+
+		h1[i], h1[j] = h1[j], h1[i]
+
+	}
+
+	copy(priv.scalar[:], h1[:sizeFr])
+
+	var bscalar big.Int
+	bscalar.SetBytes(priv.scalar[:])
+	pub.A.ScalarMul(&c.Base, &bscalar)
+
+	priv.PublicKey = pub
+
+	return priv, nil
+}
+
+// GenerateKeyInterfaces generate interfaces for the public/private key.
+// This purpose of this function is to be registered in the list of signature schemes.
+func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
+	priv, err := GenerateKey(r)
+	return &priv, err
+}
+
+// Equal compares 2 public keys
+func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+	bpk := pub.Bytes()
+	bother := other.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bother) == 1
+}
+
+// Public returns the public key associated to the private key.
+// From Signer interface defined in gnark/crypto/signature.
+func (privKey *PrivateKey) Public() signature.PublicKey {
+	var pub PublicKey
+	pub.A.Set(&privKey.PublicKey.A)
+	return &pub
+}
+
+// Sign sign a message
+// Pure Eddsa version (see https://tools.ietf.org/html/rfc8032#page-8)
+func (privKey *PrivateKey) Sign(message []byte, hFunc hash.Hash) ([]byte, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	var res Signature
+
+	// blinding factor for the private key
+	// blindingFactorBigInt must be the same size as the private key,
+	// blindingFactorBigInt = h(randomness_source||message)[:sizeFr]
+	var blindingFactorBigInt big.Int
+
+	// randSrc = privKey.randSrc || msg (-> message = MSB message .. LSB message)
+	randSrc := make([]byte, 32+len(message))
+	for i, v := range privKey.randSrc {
+		randSrc[i] = v
+	}
+	copy(randSrc[32:], message)
+
+	// randBytes = H(randSrc)
+	blindingFactorBytes := blake2b.Sum512(randSrc[:]) // TODO ensures that the hash used to build the key and the one used here is the same
+	blindingFactorBigInt.SetBytes(blindingFactorBytes[:sizeFr])
+
+	// compute R = randScalar*Base
+	res.R.ScalarMul(&curveParams.Base, &blindingFactorBigInt)
+	if !res.R.IsOnCurve() {
+		return nil, errNotOnCurve
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	resRX := res.R.X.Bytes()
+	resRY := res.R.Y.Bytes()
+	resAX := privKey.PublicKey.A.X.Bytes()
+	resAY := privKey.PublicKey.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], resRX[:])
+	copy(dataToHash[sizeFr:], resRY[:])
+	copy(dataToHash[2*sizeFr:], resAX[:])
+	copy(dataToHash[3*sizeFr:], resAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	_, err := hFunc.Write(dataToHash[:])
+	if err != nil {
+		return nil, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// Compute s = randScalarInt + H(R,A,M)*S
+	// going with big int to do ops mod curve order
+	var bscalar, bs big.Int
+	bscalar.SetBytes(privKey.scalar[:])
+	bs.Mul(&hramInt, &bscalar).
+		Add(&bs, &blindingFactorBigInt).
+		Mod(&bs, &curveParams.Order)
+	sb := bs.Bytes()
+	if len(sb) < sizeFr {
+		offset := make([]byte, sizeFr-len(sb))
+		sb = append(offset, sb...)
+	}
+	copy(res.S[:], sb[:])
+
+	return res.Bytes(), nil
+}
+
+// Verify verifies an eddsa signature
+func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	// verify that pubKey and R are on the curve
+	if !pub.A.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// Deserialize the signature
+	var sig Signature
+	if _, err := sig.SetBytes(sigBin); err != nil {
+		return false, err
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	sigRX := sig.R.X.Bytes()
+	sigRY := sig.R.Y.Bytes()
+	sigAX := pub.A.X.Bytes()
+	sigAY := pub.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], sigRX[:])
+	copy(dataToHash[sizeFr:], sigRY[:])
+	copy(dataToHash[2*sizeFr:], sigAX[:])
+	copy(dataToHash[3*sizeFr:], sigAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	if _, err := hFunc.Write(dataToHash[:]); err != nil {
+		return false, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// lhs = cofactor*S*Base
+	var lhs twistededwards.PointAffine
+	var bCofactor, bs big.Int
+	curveParams.Cofactor.ToBigInt(&bCofactor)
+	bs.SetBytes(sig.S[:])
+	lhs.ScalarMul(&curveParams.Base, &bs).
+		ScalarMul(&lhs, &bCofactor)
+
+	if !lhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// rhs = cofactor*(R + H(R,A,M)*A)
+	var rhs twistededwards.PointAffine
+	rhs.ScalarMul(&pub.A, &hramInt).
+		Add(&rhs, &sig.R).
+		ScalarMul(&rhs, &bCofactor)
+	if !rhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// verifies that cofactor*S*Base=cofactor*(R + H(R,A,M)*A)
+	if !lhs.X.Equal(&rhs.X) || !lhs.Y.Equal(&rhs.Y) {
+		return false, nil
+	}
+
+	return true, nil
+}
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
new file mode 100644
index 000000000..7d284bee3
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/sha256"
+	"math/rand"
+	"testing"
+
+	crand "crypto/rand"
+
+	"fmt"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/hash"
+	"github.com/consensys/gnark-crypto/signature"
+)
+
+func Example() {
+	// instantiate hash function
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	// create a eddsa key pair
+	privateKey, _ := signature.EDDSA_BW6_756.New(crand.Reader)
+	publicKey := privateKey.Public()
+
+	// note that the message is on 4 bytes
+	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
+
+	// sign the message
+	signature, _ := privateKey.Sign(msg, hFunc)
+
+	// verifies signature
+	isValid, _ := publicKey.Verify(signature, msg, hFunc)
+	if !isValid {
+		fmt.Println("1. invalid signature")
+	} else {
+		fmt.Println("1. valid signature")
+	}
+
+	// Output: 1. valid signature
+}
+
+func TestSerialization(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	privKey1, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey1 := privKey1.Public()
+
+	privKey2, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey2 := privKey2.Public()
+
+	pubKeyBin1 := pubKey1.Bytes()
+	pubKey2.SetBytes(pubKeyBin1)
+	pubKeyBin2 := pubKey2.Bytes()
+	if len(pubKeyBin1) != len(pubKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(pubKeyBin1); i++ {
+		if pubKeyBin1[i] != pubKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+
+	privKeyBin1 := privKey1.Bytes()
+	privKey2.SetBytes(privKeyBin1)
+	privKeyBin2 := privKey2.Bytes()
+	if len(privKeyBin1) != len(privKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(privKeyBin1); i++ {
+		if privKeyBin1[i] != privKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+}
+
+func TestEddsaMIMC(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(nil)
+	}
+	pubKey := privKey.Public()
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, err := privKey.Sign(msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035979")
+	msgBin = frMsg.Bytes()
+	res, err = pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+func TestEddsaSHA256(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := sha256.New()
+
+	// create eddsa obj and sign a message
+	// create eddsa obj and sign a message
+
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	signature, err := privKey.Sign([]byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, []byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	res, err = pubKey.Verify(signature, []byte("wrong_message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+// benchmarks
+
+func BenchmarkVerify(b *testing.B) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		b.Fatal(err)
+	}
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, _ := privKey.Sign(msgBin[:], hFunc)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		pubKey.Verify(signature, msgBin[:], hFunc)
+	}
+}
diff --git a/ecc/bw6-756/twistededwards/eddsa/marshal.go b/ecc/bw6-756/twistededwards/eddsa/marshal.go
new file mode 100644
index 000000000..c68129087
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/marshal.go
@@ -0,0 +1,133 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"io"
+)
+
+// Bytes returns the binary representation of the public key
+// follows https://tools.ietf.org/html/rfc8032#section-3.1
+// and returns a compressed representation of the point (x,y)
+//
+// x, y are the coordinates of the point
+// on the twisted Edwards as big endian integers.
+// compressed representation store x with a parity bit to recompute y
+func (pk *PublicKey) Bytes() []byte {
+	var res [sizePublicKey]byte
+	pkBin := pk.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pkBin[:])
+	return res[:]
+}
+
+// SetBytes sets p from binary representation in buf.
+// buf represents a public key as x||y where x, y are
+// interpreted as big endian binary numbers corresponding
+// to the coordinates of a point on the twisted Edwards.
+// It returns the number of bytes read from the buffer.
+func (pk *PublicKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePublicKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := pk.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !pk.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	return n, nil
+}
+
+// Bytes returns the binary representation of pk,
+// as byte array publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+func (privKey *PrivateKey) Bytes() []byte {
+	var res [sizePrivateKey]byte
+	pubkBin := privKey.PublicKey.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pubkBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:2*sizeFr], privKey.scalar[:])
+	subtle.ConstantTimeCopy(1, res[2*sizeFr:], privKey.randSrc[:])
+	return res[:]
+}
+
+// SetBytes sets pk from buf, where buf is interpreted
+// as  publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+// It returns the number byte read.
+func (privKey *PrivateKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePrivateKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := privKey.PublicKey.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !privKey.PublicKey.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, privKey.scalar[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	subtle.ConstantTimeCopy(1, privKey.randSrc[:], buf[2*sizeFr:])
+	n += sizeFr
+	return n, nil
+}
+
+// Bytes returns the binary representation of sig
+// as a byte array of size 3*sizeFr x||y||s where
+// * x, y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+func (sig *Signature) Bytes() []byte {
+	var res [sizeSignature]byte
+	sigRBin := sig.R.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], sigRBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:], sig.S[:])
+	return res[:]
+}
+
+// SetBytes sets sig from a buffer in binary.
+// buf is read interpreted as x||y||s where
+// * x,y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+// It returns the number of bytes read from buf.
+func (sig *Signature) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizeSignature {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := sig.R.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !sig.R.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, sig.S[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	return n, nil
+}
diff --git a/ecc/bw6-756/twistededwards/point.go b/ecc/bw6-756/twistededwards/point.go
new file mode 100644
index 000000000..d6457b1a1
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/point.go
@@ -0,0 +1,411 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"crypto/subtle"
+	"io"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// PointAffine point on a twisted Edwards curve
+type PointAffine struct {
+	X, Y fr.Element
+}
+
+// PointProj point in projective coordinates
+type PointProj struct {
+	X, Y, Z fr.Element
+}
+
+const (
+	//following https://tools.ietf.org/html/rfc8032#section-3.1,
+	// an fr element x is negative if its binary encoding is
+	// lexicographically larger than -x.
+	mCompressedNegative = 0x80
+	mCompressedPositive = 0x00
+	mUnmask             = 0x7f
+
+	// size in byte of a compressed point (point.Y --> fr.Element)
+	sizePointCompressed = fr.Limbs * 8
+)
+
+// Bytes returns the compressed point as a byte array
+// Follows https://tools.ietf.org/html/rfc8032#section-3.1,
+// as the twisted Edwards implementation is primarily used
+// for eddsa.
+func (p *PointAffine) Bytes() [sizePointCompressed]byte {
+
+	var res [sizePointCompressed]byte
+	var mask uint
+
+	y := p.Y.Bytes()
+
+	if p.X.LexicographicallyLargest() {
+		mask = mCompressedNegative
+	} else {
+		mask = mCompressedPositive
+	}
+	// p.Y must be in little endian
+	y[0] |= byte(mask) // msb of y
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		y[i], y[j] = y[j], y[i]
+	}
+	subtle.ConstantTimeCopy(1, res[:], y[:])
+	return res
+}
+
+// Marshal converts p to a byte slice
+func (p *PointAffine) Marshal() []byte {
+	b := p.Bytes()
+	return b[:]
+}
+
+func computeX(y *fr.Element) (x fr.Element) {
+	var one, num, den fr.Element
+	one.SetOne()
+	num.Square(y)
+	den.Mul(&num, &edwards.D)
+	num.Sub(&one, &num)
+	den.Sub(&edwards.A, &den)
+	x.Div(&num, &den)
+	x.Sqrt(&x)
+	return
+}
+
+// SetBytes sets p from buf
+// len(buf) >= sizePointCompressed
+// buf contains the Y coordinate masked with a parity bit to recompute the X coordinate
+// from the curve equation. See Bytes() and https://tools.ietf.org/html/rfc8032#section-3.1
+// Returns the number of read bytes and an error if the buffer is too short.
+func (p *PointAffine) SetBytes(buf []byte) (int, error) {
+
+	if len(buf) < sizePointCompressed {
+		return 0, io.ErrShortBuffer
+	}
+	bufCopy := make([]byte, sizePointCompressed)
+	subtle.ConstantTimeCopy(1, bufCopy, buf[:sizePointCompressed])
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		bufCopy[i], bufCopy[j] = bufCopy[j], bufCopy[i]
+	}
+	isLexicographicallyLargest := (mCompressedNegative&bufCopy[0])>>7 == 1
+	bufCopy[0] &= mUnmask
+	p.Y.SetBytes(bufCopy)
+	p.X = computeX(&p.Y)
+	if isLexicographicallyLargest {
+		if !p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	} else {
+		if p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	}
+
+	return sizePointCompressed, nil
+}
+
+// Unmarshal alias to SetBytes()
+func (p *PointAffine) Unmarshal(b []byte) error {
+	_, err := p.SetBytes(b)
+	return err
+}
+
+// Set sets p to p1 and return it
+func (p *PointProj) Set(p1 *PointProj) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.Set(&p1.Z)
+	return p
+}
+
+// Set sets p to p1 and return it
+func (p *PointAffine) Set(p1 *PointAffine) *PointAffine {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	return p
+}
+
+// Equal returns true if p=p1 false otherwise
+func (p *PointAffine) Equal(p1 *PointAffine) bool {
+	return p.X.Equal(&p1.X) && p.Y.Equal(&p1.Y)
+}
+
+// Equal returns true if p=p1 false otherwise
+// If one point is on the affine chart Z=0 it returns false
+func (p *PointProj) Equal(p1 *PointProj) bool {
+	if p.Z.IsZero() || p1.Z.IsZero() {
+		return false
+	}
+	var pAffine, p1Affine PointAffine
+	pAffine.FromProj(p)
+	p1Affine.FromProj(p1)
+	return pAffine.Equal(&p1Affine)
+}
+
+// NewPointAffine creates a new instance of PointAffine
+func NewPointAffine(x, y fr.Element) PointAffine {
+	return PointAffine{x, y}
+}
+
+// IsOnCurve checks if a point is on the twisted Edwards curve
+func (p *PointAffine) IsOnCurve() bool {
+
+	ecurve := GetEdwardsCurve()
+
+	var lhs, rhs, tmp fr.Element
+
+	tmp.Mul(&p.Y, &p.Y)
+	lhs.Mul(&p.X, &p.X)
+	mulByA(&lhs)
+	lhs.Add(&lhs, &tmp)
+
+	tmp.Mul(&p.X, &p.X).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &ecurve.D)
+	rhs.SetOne().Add(&rhs, &tmp)
+
+	return lhs.Equal(&rhs)
+}
+
+// Add adds two points (x,y), (u,v) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Add(p1, p2 *PointAffine) *PointAffine {
+
+	ecurve := GetEdwardsCurve()
+
+	var xu, yv, xv, yu, dxyuv, one, denx, deny fr.Element
+	pRes := new(PointAffine)
+	xv.Mul(&p1.X, &p2.Y)
+	yu.Mul(&p1.Y, &p2.X)
+	pRes.X.Add(&xv, &yu)
+
+	xu.Mul(&p1.X, &p2.X)
+	mulByA(&xu)
+	yv.Mul(&p1.Y, &p2.Y)
+	pRes.Y.Sub(&yv, &xu)
+
+	dxyuv.Mul(&xv, &yu).Mul(&dxyuv, &ecurve.D)
+	one.SetOne()
+	denx.Add(&one, &dxyuv)
+	deny.Sub(&one, &dxyuv)
+
+	p.X.Div(&pRes.X, &denx)
+	p.Y.Div(&pRes.Y, &deny)
+
+	return p
+}
+
+// Double doubles point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Double(p1 *PointAffine) *PointAffine {
+
+	p.Set(p1)
+	var xx, yy, xy, denum, two fr.Element
+
+	xx.Square(&p.X)
+	yy.Square(&p.Y)
+	xy.Mul(&p.X, &p.Y)
+	mulByA(&xx)
+	denum.Add(&xx, &yy)
+
+	p.X.Double(&xy).Div(&p.X, &denum)
+
+	two.SetOne().Double(&two)
+	denum.Neg(&denum).Add(&denum, &two)
+
+	p.Y.Sub(&yy, &xx).Div(&p.Y, &denum)
+
+	return p
+}
+
+// Neg negates point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointProj) Neg(p1 *PointProj) *PointProj {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// FromProj sets p in affine from p in projective
+func (p *PointAffine) FromProj(p1 *PointProj) *PointAffine {
+	p.X.Div(&p1.X, &p1.Z)
+	p.Y.Div(&p1.Y, &p1.Z)
+	return p
+}
+
+// FromAffine sets p in projective from p in affine
+func (p *PointProj) FromAffine(p1 *PointAffine) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.SetOne()
+	return p
+}
+
+// Add adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-add-2008-bbjlp
+func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var A, B, C, D, E, F, G, H, I fr.Element
+	A.Mul(&p1.Z, &p2.Z)
+	B.Square(&A)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &A).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	C.Neg(&C)
+	res.Y.Add(&D, &C).
+		Mul(&res.Y, &A).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// MixedAdd adds a point in projective to a point in affine coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-madd-2008-bbjlp
+func (p *PointProj) MixedAdd(p1 *PointProj, p2 *PointAffine) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var B, C, D, E, F, G, H, I fr.Element
+	B.Square(&p1.Z)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &p1.Z).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	res.Y.Sub(&D, &C).
+		Mul(&res.Y, &p1.Z).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// Double adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#doubling-dbl-2008-bbjlp
+func (p *PointProj) Double(p1 *PointProj) *PointProj {
+
+	var res PointProj
+
+	var B, C, D, E, F, H, J fr.Element
+
+	B.Add(&p1.X, &p1.Y).Square(&B)
+	C.Square(&p1.X)
+	D.Square(&p1.Y)
+	E.Set(&C)
+	mulByA(&E)
+	F.Add(&E, &D)
+	H.Square(&p1.Z)
+	J.Sub(&F, &H).Sub(&J, &H)
+	res.X.Sub(&B, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &J)
+	res.Y.Sub(&E, &D).Mul(&res.Y, &F)
+	res.Z.Mul(&F, &J)
+
+	p.Set(&res)
+	return p
+}
+
+// Neg sets p to -p1 and returns it
+func (p *PointAffine) Neg(p1 *PointAffine) *PointAffine {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// setInfinity sets p to O (0:1:1)
+func (p *PointProj) setInfinity() *PointProj {
+	p.X.SetZero()
+	p.Y.SetOne()
+	p.Z.SetOne()
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in projective coordinates with a scalar in big.Int
+func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
+
+	var _scalar big.Int
+	_scalar.Set(scalar)
+	p.Set(p1)
+	if _scalar.Sign() == -1 {
+		_scalar.Neg(&_scalar)
+		p.Neg(p)
+	}
+	var resProj PointProj
+	resProj.setInfinity()
+	const wordSize = bits.UintSize
+	sWords := _scalar.Bits()
+
+	for i := len(sWords) - 1; i >= 0; i-- {
+		ithWord := sWords[i]
+		for k := 0; k < wordSize; k++ {
+			resProj.Double(&resProj)
+			kthBit := (ithWord >> (wordSize - 1 - k)) & 1
+			if kthBit == 1 {
+				resProj.Add(&resProj, p)
+			}
+		}
+	}
+
+	p.Set(&resProj)
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in affine coordinates with a scalar in big.Int
+func (p *PointAffine) ScalarMul(p1 *PointAffine, scalar *big.Int) *PointAffine {
+
+	var p1Proj, resProj PointProj
+	p1Proj.FromAffine(p1)
+	resProj.ScalarMul(&p1Proj, scalar)
+	p.FromProj(&resProj)
+
+	return p
+}
diff --git a/ecc/bw6-756/twistededwards/twistededwards.go b/ecc/bw6-756/twistededwards/twistededwards.go
new file mode 100644
index 000000000..152ac7c20
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/twistededwards.go
@@ -0,0 +1,63 @@
+/*
+Copyright © 2020 ConsenSys
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package twistededwards
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element // in Montgomery form
+	Cofactor fr.Element // not in Montgomery form
+	Order    big.Int
+	Base     PointAffine
+}
+
+var edwards CurveParams
+
+// GetEdwardsCurve returns the twisted Edwards curve on BW6-756's Fr
+func GetEdwardsCurve() CurveParams {
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&edwards.A)
+	res.D.Set(&edwards.D)
+	res.Cofactor.Set(&edwards.Cofactor)
+	res.Order.Set(&edwards.Order)
+	res.Base.Set(&edwards.Base)
+
+	return res
+}
+
+func init() {
+
+	edwards.A.SetUint64(143580)
+	edwards.D.SetUint64(143576)
+	edwards.Cofactor.SetUint64(8).FromMont()
+	edwards.Order.SetString("75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493", 10)
+
+	edwards.Base.X.SetString("178620376715698421301710631119120785579284871526578026139185646772672252736182448135689014711987732666078420387915")
+	edwards.Base.Y.SetString("279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501")
+}
+
+// mulByA multiplies fr.Element by edwards.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &edwards.A)
+}
diff --git a/ecc/bw6-756/twistededwards/twistededwards_test.go b/ecc/bw6-756/twistededwards/twistededwards_test.go
new file mode 100644
index 000000000..b0398d2d5
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/twistededwards_test.go
@@ -0,0 +1,456 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	// affine
+	properties.Property("Equal affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1 PointAffine
+			p1.Set(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&params.Base)
+		},
+	))
+
+	properties.Property("Add affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+			p3.Set(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.Set(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			var s big.Int
+			s.SetUint64(10)
+
+			p2.ScalarMul(&p1, &s)
+			p1.ScalarMul(&p1, &s)
+
+			return p2.Equal(&p1)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	// proj
+	properties.Property("Equal projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1, baseProj PointProj
+			p1.FromAffine(&params.Base)
+			baseProj.FromAffine(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&baseProj)
+		},
+	))
+
+	properties.Property("Add projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+			p3.FromAffine(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.FromAffine(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestField(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS := GenBigInt()
+
+	properties.Property("MulByA(x) should match Mul(x, curve.A)", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var z1, z2 fr.Element
+			z1.SetBigInt(&s)
+			z2.Mul(&z1, &params.A)
+			mulByA(&z1)
+
+			return z1.Equal(&z2)
+		},
+		genS,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS1 := GenBigInt()
+	genS2 := GenBigInt()
+
+	// affine
+	properties.Property("(affine) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+			p2.Neg(&p1)
+
+			p1.Add(&p1, &p2)
+
+			var one fr.Element
+			one.SetOne()
+
+			return p1.IsOnCurve() && p1.X.IsZero() && p1.Y.Equal(&one)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			p1.ScalarMul(&params.Base, &s)
+			p2.ScalarMul(&params.Base, &s)
+
+			p1.Add(&p1, &p2)
+			p2.Double(&p2)
+
+			return p1.IsOnCurve() && p1.Equal(&p2) && !p1.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) [a]P+[b]P = [a+b]P", prop.ForAll(
+		func(s1, s2 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetZero()
+			p1.ScalarMul(&params.Base, &s1)
+			p2.ScalarMul(&params.Base, &s2)
+			p3.Set(&params.Base)
+
+			p2.Add(&p1, &p2)
+
+			s1.Add(&s1, &s2)
+			p3.ScalarMul(&params.Base, &s1)
+
+			return p2.IsOnCurve() && p3.Equal(&p2) && !p3.Equal(&inf)
+		},
+		genS1,
+		genS2,
+	))
+
+	properties.Property("(affine) [a]P+[-a]P = O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetOne()
+			p1.ScalarMul(&params.Base, &s1)
+			s1.Neg(&s1)
+			p2.ScalarMul(&params.Base, &s1)
+
+			p2.Add(&p1, &p2)
+
+			return p2.IsOnCurve() && p2.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("[5]P=[2][2]P+P", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+
+			five := big.NewInt(5)
+			p2.Double(&p1).Double(&p2).Add(&p2, &p1)
+			p1.ScalarMul(&p1, five)
+
+			return p2.IsOnCurve() && p2.Equal(&p1)
+		},
+		genS1,
+	))
+
+	// proj
+	properties.Property("(projective) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p1.ScalarMul(&baseProj, &s1)
+			p2.Neg(&p1)
+
+			p.Add(&p1, &p2)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(projective) P+P=2*P", prop.ForAll(
+
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p.ScalarMul(&baseProj, &s)
+
+			p1.Add(&p, &p)
+			p2.Double(&p)
+
+			return p1.Equal(&p2)
+		},
+		genS1,
+	))
+
+	// mixed
+	properties.Property("(mixed) P+(-P)=O", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+			pAffine.Neg(&pAffine)
+
+			p.MixedAdd(&pProj, &pAffine)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(mixed) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p, p2 PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+
+			p.MixedAdd(&pProj, &pAffine)
+			p2.Double(&pProj)
+
+			return p.Equal(&p2)
+		},
+		genS1,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestMarshal(t *testing.T) {
+
+	var point, unmarshalPoint PointAffine
+	point.Set(&edwards.Base)
+	for i := 0; i < 20; i++ {
+		b := point.Marshal()
+		unmarshalPoint.Unmarshal(b)
+		if !point.Equal(&unmarshalPoint) {
+			t.Fatal("error unmarshal(marshal(point))")
+		}
+		point.Add(&point, &edwards.Base)
+	}
+}
+
+// GenBigInt generates a big.Int
+// TODO @thomas we use fr size as max bound here
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkScalarMul(b *testing.B) {
+	params := GetEdwardsCurve()
+	var a PointProj
+	var s big.Int
+	a.FromAffine(&params.Base)
+	s.SetString("52435875175126190479447705081859658376581184513", 10)
+	s.Add(&s, &params.Order)
+
+	var doubleAndAdd PointProj
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.ScalarMul(&a, &s)
+		}
+	})
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index 3615cf261..2b6d2378e 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, BLS12-378 and BW6-756 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, bls12-378 and bw6-756 elliptic curves implementation (+pairing).
 //
 // Also
 //
diff --git a/hash/hashes.go b/hash/hashes.go
index 01b9b86ae..494b4d1d9 100644
--- a/hash/hashes.go
+++ b/hash/hashes.go
@@ -26,6 +26,7 @@ import (
 	bls315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/mimc"
 	bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
 	bw633 "github.com/consensys/gnark-crypto/ecc/bw6-633/fr/mimc"
+	bw756 "github.com/consensys/gnark-crypto/ecc/bw6-756/fr/mimc"
 	bw761 "github.com/consensys/gnark-crypto/ecc/bw6-761/fr/mimc"
 )
 
@@ -39,6 +40,7 @@ const (
 	MIMC_BW6_761
 	MIMC_BLS24_315
 	MIMC_BW6_633
+	MIMC_BW6_756
 )
 
 // size of digests in bytes
@@ -50,6 +52,7 @@ var digestSize = []uint8{
 	MIMC_BW6_761:   96,
 	MIMC_BLS24_315: 48,
 	MIMC_BW6_633:   80,
+	MIMC_BW6_756:   96,
 }
 
 // New creates the corresponding mimc hash function.
@@ -69,6 +72,8 @@ func (m Hash) New(seed string) hash.Hash {
 		return bls315.NewMiMC(seed)
 	case MIMC_BW6_633:
 		return bw633.NewMiMC(seed)
+	case MIMC_BW6_756:
+		return bw756.NewMiMC(seed)
 	default:
 		panic("Unknown mimc ID")
 	}
@@ -91,6 +96,8 @@ func (m Hash) String() string {
 		return "MIMC_BLS315"
 	case MIMC_BW6_633:
 		return "MIMC_BW633"
+	case MIMC_BW6_756:
+		return "MIMC_BW756"
 	default:
 		panic("Unknown mimc ID")
 	}
diff --git a/internal/generator/crypto/signature/eddsa/generate.go b/internal/generator/crypto/signature/eddsa/generate.go
index dd4f82519..1422a4137 100644
--- a/internal/generator/crypto/signature/eddsa/generate.go
+++ b/internal/generator/crypto/signature/eddsa/generate.go
@@ -8,10 +8,6 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_756) {
-		return nil
-	}
-
 	// eddsa
 	conf.Package = "eddsa"
 	entries := []bavard.Entry{
diff --git a/internal/generator/edwards/generate.go b/internal/generator/edwards/generate.go
index b2ea5b5e1..45f6bf1bd 100644
--- a/internal/generator/edwards/generate.go
+++ b/internal/generator/edwards/generate.go
@@ -8,10 +8,6 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_756) {
-		return nil
-	}
-
 	conf.Package = "twistededwards"
 
 	entries := []bavard.Entry{
diff --git a/signature/signature.go b/signature/signature.go
index f736ea653..d71cda2e7 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -75,7 +75,7 @@ type Signer interface {
 
 type SignatureScheme uint
 
-const maxSignatures = 7
+const maxSignatures = 8
 
 const (
 	EDDSA_BN254 SignatureScheme = iota
@@ -85,6 +85,7 @@ const (
 	EDDSA_BW6_761
 	EDDSA_BLS24_315
 	EDDSA_BW6_633
+	EDDSA_BW6_756
 )
 
 var signatures = make([]func(io.Reader) (Signer, error), maxSignatures)

From 263e862a80e0375ab94e69aa139c3ca09a88157f Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 31 Dec 2021 19:18:22 +0100
Subject: [PATCH 09/29] perf(bls12-378/tEd): smallest A coeff

---
 ecc/bls12-378/twistededwards/twistededwards.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ecc/bls12-378/twistededwards/twistededwards.go b/ecc/bls12-378/twistededwards/twistededwards.go
index e2f98390c..676838f24 100644
--- a/ecc/bls12-378/twistededwards/twistededwards.go
+++ b/ecc/bls12-378/twistededwards/twistededwards.go
@@ -49,12 +49,12 @@ func GetEdwardsCurve() CurveParams {
 
 func init() {
 
-	edwards.A.SetString("1169928")
-	edwards.D.SetString("1169924")
+	edwards.A.SetString("16249")
+	edwards.D.SetString("826857503717340716663906603396009292766308904506333520048618402505612607353")
 	edwards.Cofactor.SetUint64(8).FromMont()
 	edwards.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
 
-	edwards.Base.X.SetString("4274983589151226901853657690021194631121133716096168671136076068148698830183")
+	edwards.Base.X.SetString("6772953896463446981848394912418300623023000177913479948380771331313783560843")
 	edwards.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
 }
 

From 5f217792bb3b24caae1dfdc69f71717be35d8339 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 28 Dec 2021 11:18:28 +0100
Subject: [PATCH 10/29] feat: add bw6-756 (2-chain w/ bls12-378 GT-strong)

---
 ecc/bw6-756/bw6-756.go                        |  120 +
 ecc/bw6-756/doc.go                            |   18 +
 ecc/bw6-756/fp/arith.go                       |   60 +
 ecc/bw6-756/fp/asm.go                         |   24 +
 ecc/bw6-756/fp/asm_noadx.go                   |   25 +
 ecc/bw6-756/fp/bw6_utils.go                   |   27 +
 ecc/bw6-756/fp/doc.go                         |   43 +
 ecc/bw6-756/fp/element.go                     | 2722 ++++++++++++++++
 ecc/bw6-756/fp/element_exp.go                 | 1993 ++++++++++++
 ecc/bw6-756/fp/element_fuzz.go                |  200 ++
 ecc/bw6-756/fp/element_mul_adx_amd64.s        | 2739 ++++++++++++++++
 ecc/bw6-756/fp/element_mul_amd64.s            | 2759 ++++++++++++++++
 ecc/bw6-756/fp/element_ops_amd64.go           |   50 +
 ecc/bw6-756/fp/element_ops_amd64.s            |  746 +++++
 ecc/bw6-756/fp/element_ops_noasm.go           |   78 +
 ecc/bw6-756/fp/element_test.go                | 2777 +++++++++++++++++
 ecc/bw6-756/fr/arith.go                       |   60 +
 ecc/bw6-756/fr/asm.go                         |   24 +
 ecc/bw6-756/fr/asm_noadx.go                   |   25 +
 ecc/bw6-756/fr/doc.go                         |   43 +
 ecc/bw6-756/fr/element.go                     | 1720 ++++++++++
 ecc/bw6-756/fr/element_exp.go                 | 1040 ++++++
 ecc/bw6-756/fr/element_fuzz.go                |  152 +
 ecc/bw6-756/fr/element_mul_adx_amd64.s        |  836 +++++
 ecc/bw6-756/fr/element_mul_amd64.s            |  858 +++++
 ecc/bw6-756/fr/element_ops_amd64.go           |   50 +
 ecc/bw6-756/fr/element_ops_amd64.s            |  452 +++
 ecc/bw6-756/fr/element_ops_noasm.go           |   78 +
 ecc/bw6-756/fr/element_test.go                | 2681 ++++++++++++++++
 ecc/bw6-756/fr/fft/doc.go                     |   18 +
 ecc/bw6-756/fr/fft/domain.go                  |  300 ++
 ecc/bw6-756/fr/fft/domain_test.go             |   47 +
 ecc/bw6-756/fr/fft/fft.go                     |  319 ++
 ecc/bw6-756/fr/fft/fft_test.go                |  415 +++
 ecc/bw6-756/fr/fft/fuzz.go                    |   74 +
 ecc/bw6-756/fr/fft/fuzz_test.go               |   56 +
 ecc/bw6-756/fr/kzg/doc.go                     |   18 +
 ecc/bw6-756/fr/kzg/fuzz.go                    |   84 +
 ecc/bw6-756/fr/kzg/fuzz_test.go               |   56 +
 ecc/bw6-756/fr/kzg/kzg.go                     |  518 +++
 ecc/bw6-756/fr/kzg/kzg_test.go                |  453 +++
 ecc/bw6-756/fr/kzg/marshal.go                 |  138 +
 ecc/bw6-756/fr/mimc/doc.go                    |   18 +
 ecc/bw6-756/fr/mimc/fuzz.go                   |   34 +
 ecc/bw6-756/fr/mimc/mimc.go                   |  174 ++
 ecc/bw6-756/fr/permutation/doc.go             |   18 +
 ecc/bw6-756/fr/permutation/permutation.go     |  361 +++
 .../fr/permutation/permutation_test.go        |   94 +
 ecc/bw6-756/fr/plookup/doc.go                 |   18 +
 ecc/bw6-756/fr/plookup/plookup_test.go        |  139 +
 ecc/bw6-756/fr/plookup/table.go               |  252 ++
 ecc/bw6-756/fr/plookup/vector.go              |  687 ++++
 ecc/bw6-756/fr/polynomial/doc.go              |   18 +
 ecc/bw6-756/fr/polynomial/polynomial.go       |  123 +
 ecc/bw6-756/fr/polynomial/polynomial_test.go  |  208 ++
 ecc/bw6-756/fuzz.go                           |   76 +
 ecc/bw6-756/fuzz_test.go                      |   56 +
 ecc/bw6-756/g1.go                             | 1081 +++++++
 ecc/bw6-756/g1_test.go                        |  664 ++++
 ecc/bw6-756/g2.go                             |  933 ++++++
 ecc/bw6-756/g2_test.go                        |  664 ++++
 ecc/bw6-756/hash_to_curve.go                  |  262 ++
 ecc/bw6-756/internal/fptower/e3.go            |  299 ++
 ecc/bw6-756/internal/fptower/e3_test.go       |  330 ++
 ecc/bw6-756/internal/fptower/e6.go            |  412 +++
 ecc/bw6-756/internal/fptower/e6_pairing.go    |  127 +
 ecc/bw6-756/internal/fptower/e6_test.go       |  387 +++
 ecc/bw6-756/internal/fptower/frobenius.go     |  102 +
 .../internal/fptower/generators_test.go       |   43 +
 ecc/bw6-756/marshal.go                        | 1155 +++++++
 ecc/bw6-756/marshal_test.go                   |  457 +++
 ecc/bw6-756/multiexp.go                       |  983 ++++++
 ecc/bw6-756/multiexp_test.go                  |  701 +++++
 ecc/bw6-756/pairing.go                        |  366 +++
 ecc/bw6-756/pairing_test.go                   |  306 ++
 ecc/ecc.go                                    |    9 +-
 ecc/ecc.md                                    |    2 +
 ...60554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0 |  Bin 0 -> 6633 bytes
 ...f43429276019e3f31fc34200000000000000000000 |  Bin 0 -> 7376 bytes
 internal/generator/config/bw6-756.go          |   28 +
 internal/generator/tower/generate.go          |    2 +-
 81 files changed, 36482 insertions(+), 3 deletions(-)
 create mode 100644 ecc/bw6-756/bw6-756.go
 create mode 100644 ecc/bw6-756/doc.go
 create mode 100644 ecc/bw6-756/fp/arith.go
 create mode 100644 ecc/bw6-756/fp/asm.go
 create mode 100644 ecc/bw6-756/fp/asm_noadx.go
 create mode 100644 ecc/bw6-756/fp/bw6_utils.go
 create mode 100644 ecc/bw6-756/fp/doc.go
 create mode 100644 ecc/bw6-756/fp/element.go
 create mode 100644 ecc/bw6-756/fp/element_exp.go
 create mode 100644 ecc/bw6-756/fp/element_fuzz.go
 create mode 100644 ecc/bw6-756/fp/element_mul_adx_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_mul_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_ops_amd64.go
 create mode 100644 ecc/bw6-756/fp/element_ops_amd64.s
 create mode 100644 ecc/bw6-756/fp/element_ops_noasm.go
 create mode 100644 ecc/bw6-756/fp/element_test.go
 create mode 100644 ecc/bw6-756/fr/arith.go
 create mode 100644 ecc/bw6-756/fr/asm.go
 create mode 100644 ecc/bw6-756/fr/asm_noadx.go
 create mode 100644 ecc/bw6-756/fr/doc.go
 create mode 100644 ecc/bw6-756/fr/element.go
 create mode 100644 ecc/bw6-756/fr/element_exp.go
 create mode 100644 ecc/bw6-756/fr/element_fuzz.go
 create mode 100644 ecc/bw6-756/fr/element_mul_adx_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_mul_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_ops_amd64.go
 create mode 100644 ecc/bw6-756/fr/element_ops_amd64.s
 create mode 100644 ecc/bw6-756/fr/element_ops_noasm.go
 create mode 100644 ecc/bw6-756/fr/element_test.go
 create mode 100644 ecc/bw6-756/fr/fft/doc.go
 create mode 100644 ecc/bw6-756/fr/fft/domain.go
 create mode 100644 ecc/bw6-756/fr/fft/domain_test.go
 create mode 100644 ecc/bw6-756/fr/fft/fft.go
 create mode 100644 ecc/bw6-756/fr/fft/fft_test.go
 create mode 100644 ecc/bw6-756/fr/fft/fuzz.go
 create mode 100644 ecc/bw6-756/fr/fft/fuzz_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/doc.go
 create mode 100644 ecc/bw6-756/fr/kzg/fuzz.go
 create mode 100644 ecc/bw6-756/fr/kzg/fuzz_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/kzg.go
 create mode 100644 ecc/bw6-756/fr/kzg/kzg_test.go
 create mode 100644 ecc/bw6-756/fr/kzg/marshal.go
 create mode 100644 ecc/bw6-756/fr/mimc/doc.go
 create mode 100644 ecc/bw6-756/fr/mimc/fuzz.go
 create mode 100644 ecc/bw6-756/fr/mimc/mimc.go
 create mode 100644 ecc/bw6-756/fr/permutation/doc.go
 create mode 100644 ecc/bw6-756/fr/permutation/permutation.go
 create mode 100644 ecc/bw6-756/fr/permutation/permutation_test.go
 create mode 100644 ecc/bw6-756/fr/plookup/doc.go
 create mode 100644 ecc/bw6-756/fr/plookup/plookup_test.go
 create mode 100644 ecc/bw6-756/fr/plookup/table.go
 create mode 100644 ecc/bw6-756/fr/plookup/vector.go
 create mode 100644 ecc/bw6-756/fr/polynomial/doc.go
 create mode 100644 ecc/bw6-756/fr/polynomial/polynomial.go
 create mode 100644 ecc/bw6-756/fr/polynomial/polynomial_test.go
 create mode 100644 ecc/bw6-756/fuzz.go
 create mode 100644 ecc/bw6-756/fuzz_test.go
 create mode 100644 ecc/bw6-756/g1.go
 create mode 100644 ecc/bw6-756/g1_test.go
 create mode 100644 ecc/bw6-756/g2.go
 create mode 100644 ecc/bw6-756/g2_test.go
 create mode 100644 ecc/bw6-756/hash_to_curve.go
 create mode 100644 ecc/bw6-756/internal/fptower/e3.go
 create mode 100644 ecc/bw6-756/internal/fptower/e3_test.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6_pairing.go
 create mode 100644 ecc/bw6-756/internal/fptower/e6_test.go
 create mode 100644 ecc/bw6-756/internal/fptower/frobenius.go
 create mode 100644 ecc/bw6-756/internal/fptower/generators_test.go
 create mode 100644 ecc/bw6-756/marshal.go
 create mode 100644 ecc/bw6-756/marshal_test.go
 create mode 100644 ecc/bw6-756/multiexp.go
 create mode 100644 ecc/bw6-756/multiexp_test.go
 create mode 100644 ecc/bw6-756/pairing.go
 create mode 100644 ecc/bw6-756/pairing_test.go
 create mode 100644 internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
 create mode 100644 internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
 create mode 100644 internal/generator/config/bw6-756.go

diff --git a/ecc/bw6-756/bw6-756.go b/ecc/bw6-756/bw6-756.go
new file mode 100644
index 000000000..7b0d98fee
--- /dev/null
+++ b/ecc/bw6-756/bw6-756.go
@@ -0,0 +1,120 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// E: y**2=x**3+1
+// Etwist: y**2 = x**3+33
+// Tower: Fp->Fp6, u**6=33
+// Generator (same as BLS378): x=11045256207009841153
+// optimal Ate loops: x+1, x**2-x-1
+// Fp: p=366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+// Fr: r=605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+
+// ID BW6_756 ID
+const ID = ecc.BW6_756
+
+// bCurveCoeff b coeff of the curve
+var bCurveCoeff fp.Element
+
+// bTwistCurveCoeff b coeff of the twist (defined over Fp) curve
+var bTwistCurveCoeff fp.Element
+
+// generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+var g1Gen G1Jac
+var g2Gen G2Jac
+
+var g1GenAff G1Affine
+var g2GenAff G2Affine
+
+// point at infinity
+var g1Infinity G1Jac
+var g2Infinity G2Jac
+
+// optimal Ate loop counters
+var loopCounter0 [191]int8
+var loopCounter1 [191]int8
+
+// Parameters useful for the GLV scalar multiplication. The third roots define the
+//  endomorphisms phi1 and phi2 for <G1Affine> and <G2Affine>. lambda is such that <r, phi-lambda> lies above
+// <r> in the ring Z[phi]. More concretely it's the associated eigenvalue
+// of phi1 (resp phi2) restricted to <G1Affine> (resp <G2Affine>)
+// cf https://www.cosic.esat.kuleuven.be/nessie/reports/phase2/GLV.pdf
+var thirdRootOneG1 fp.Element
+var thirdRootOneG2 fp.Element
+var lambdaGLV big.Int
+
+// glvBasis stores R-linearly independant vectors (a,b), (c,d)
+// in ker((u,v)->u+vlambda[r]), and their determinant
+var glvBasis ecc.Lattice
+
+// generator of the curve
+var xGen big.Int
+
+func init() {
+
+	bCurveCoeff.SetOne()
+	bTwistCurveCoeff.MulByNonResidue(&bCurveCoeff)
+
+	// E(3,y) * cofactor
+	g1Gen.X.SetString("286035407532233812057489253822435660910062665263942803649298092690795938518721117964189338863504082781482751182899097859005716378386344565362972291164604792882058761734674709131229927253172681714645554597102571818586966737895501")
+	g1Gen.Y.SetString("250540671634276190125882738767359258920233951524378923555904955920886135268516617166458911260101792169356480449980342047600821278990712908224386045486820019065641642853528653616206514851361917670279865872746658429844440125628329")
+	g1Gen.Z.SetString("1")
+
+	// E(1,y) * cofactor
+	g2Gen.X.SetString("270164867145533700243149075881223225204067215320977230235816769808318087164726583740674261721395147407122688542569094772405350936550575160051166652281373572919753182191250641388443572739372443497834910784618354592418817138212395")
+	g2Gen.Y.SetString("296695446824796322573519291690935001172593568823998954880196613542512471119971074118215403545906873458039024520146929054366200365532511334310660691775675887531695313103875249166779149013653038059140912965769351316868363001510735")
+	g2Gen.Z.SetString("1")
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	// xGen+1
+	loopCounter0 = [191]int8{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, -1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+	// xGen^3-xGen^2-xGen
+	T, _ := new(big.Int).SetString("1347495683935914696108087318582641220368021451587784278015", 10)
+	ecc.NafDecomposition(T, loopCounter1[:])
+
+	g1Infinity.X.SetOne()
+	g1Infinity.Y.SetOne()
+	g2Infinity.X.SetOne()
+	g2Infinity.Y.SetOne()
+
+	thirdRootOneG2.SetString("99497571833115712246976573293861816254377473715694998268521440373748988342600853091641405554217584221455319677515385376103078837731420131015700054219263015095146628991433981753068027965212839748934246550470657")
+	thirdRootOneG1.Square(&thirdRootOneG2)
+	lambdaGLV.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337729", 10) // (x**5-3*x**4+3*x**3-x+1)
+	_r := fr.Modulus()
+	ecc.PrecomputeLattice(_r, &lambdaGLV, &glvBasis)
+
+	xGen.SetString("11045256207009841153", 10)
+
+}
+
+// Generators return the generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+func Generators() (g1Jac G1Jac, g2Jac G2Jac, g1Aff G1Affine, g2Aff G2Affine) {
+	g1Aff = g1GenAff
+	g2Aff = g2GenAff
+	g1Jac = g1Gen
+	g2Jac = g2Gen
+	return
+}
diff --git a/ecc/bw6-756/doc.go b/ecc/bw6-756/doc.go
new file mode 100644
index 000000000..6b1eaf6dd
--- /dev/null
+++ b/ecc/bw6-756/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package bw6756 efficient elliptic curve and pairing implementation for bw6-756.
+package bw6756
diff --git a/ecc/bw6-756/fp/arith.go b/ecc/bw6-756/fp/arith.go
new file mode 100644
index 000000000..66fa66748
--- /dev/null
+++ b/ecc/bw6-756/fp/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bw6-756/fp/asm.go b/ecc/bw6-756/fp/asm.go
new file mode 100644
index 000000000..7344271eb
--- /dev/null
+++ b/ecc/bw6-756/fp/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bw6-756/fp/asm_noadx.go b/ecc/bw6-756/fp/asm_noadx.go
new file mode 100644
index 000000000..ae778bd3a
--- /dev/null
+++ b/ecc/bw6-756/fp/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bw6-756/fp/bw6_utils.go b/ecc/bw6-756/fp/bw6_utils.go
new file mode 100644
index 000000000..58a2d0d10
--- /dev/null
+++ b/ecc/bw6-756/fp/bw6_utils.go
@@ -0,0 +1,27 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fp
+
+// MulByNonResidue multiplies a fp.Element by 33
+func (z *Element) MulByNonResidue(x *Element) *Element {
+	var t Element
+	t.Double(x).
+		Double(&t).
+		Double(&t).
+		Double(&t).
+		Double(&t)
+	z.Add(&t, x)
+	return z
+}
diff --git a/ecc/bw6-756/fp/doc.go b/ecc/bw6-756/fp/doc.go
new file mode 100644
index 000000000..033a5b2c8
--- /dev/null
+++ b/ecc/bw6-756/fp/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fp contains field arithmetic operations for modulus = 0xf76adb...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [12]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f868400000000000000000001 // base 16
+// 	366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849 // base 10
+package fp
diff --git a/ecc/bw6-756/fp/element.go b/ecc/bw6-756/fp/element.go
new file mode 100644
index 000000000..606e5f9dd
--- /dev/null
+++ b/ecc/bw6-756/fp/element.go
@@ -0,0 +1,2722 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 12 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+type Element [12]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 12
+
+// Bits number bits needed to represent Element
+const Bits = 756
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 1
+const qElementWord1 uint64 = 3731203976813871104
+const qElementWord2 uint64 = 15039355238879481536
+const qElementWord3 uint64 = 4828608925799409630
+const qElementWord4 uint64 = 16326337093237622437
+const qElementWord5 uint64 = 756237273905161798
+const qElementWord6 uint64 = 16934317532427647658
+const qElementWord7 uint64 = 14755673041361585881
+const qElementWord8 uint64 = 18154628166362162086
+const qElementWord9 uint64 = 6671956210750770825
+const qElementWord10 uint64 = 16333450281447942351
+const qElementWord11 uint64 = 4352613195430282
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+	qElementWord6,
+	qElementWord7,
+	qElementWord8,
+	qElementWord9,
+	qElementWord10,
+	qElementWord11,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 18446744073709551615
+
+// rSquare
+var rSquare = Element{
+	11214533042317621956,
+	4418601975293183768,
+	2233550636059863627,
+	13772400071271951950,
+	13010224617750716256,
+	15582310590478290871,
+	6301429202206019695,
+	15624904615961126890,
+	14411832617204527559,
+	10495912060283172777,
+	8432856701560321958,
+	4166778949326216,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	z[6] = x[6]
+	z[7] = x[7]
+	z[8] = x[8]
+	z[9] = x[9]
+	z[10] = x[10]
+	z[11] = x[11]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	z[6] = 0
+	z[7] = 0
+	z[8] = 0
+	z[9] = 0
+	z[10] = 0
+	z[11] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 18446744073709547378
+	z[1] = 14463961505609547775
+	z[2] = 15160016368967634470
+	z[3] = 12241294279704278364
+	z[4] = 2720419343484222500
+	z[5] = 4799902015386277509
+	z[6] = 8643488375494563078
+	z[7] = 18366804658688562287
+	z[8] = 2055362399696866477
+	z[9] = 3108243834975866807
+	z[10] = 9468215855567529777
+	z[11] = 369351476012747
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 12 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[11] == x[11]) && (z[10] == x[10]) && (z[9] == x[9]) && (z[8] == x[8]) && (z[7] == x[7]) && (z[6] == x[6]) && (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[11] > _x[11] {
+		return 1
+	} else if _z[11] < _x[11] {
+		return -1
+	}
+	if _z[10] > _x[10] {
+		return 1
+	} else if _z[10] < _x[10] {
+		return -1
+	}
+	if _z[9] > _x[9] {
+		return 1
+	} else if _z[9] < _x[9] {
+		return -1
+	}
+	if _z[8] > _x[8] {
+		return 1
+	} else if _z[8] < _x[8] {
+		return -1
+	}
+	if _z[7] > _x[7] {
+		return 1
+	} else if _z[7] < _x[7] {
+		return -1
+	}
+	if _z[6] > _x[6] {
+		return 1
+	} else if _z[6] < _x[6] {
+		return -1
+	}
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 1, 0)
+	_, b = bits.Sub64(_z[1], 1865601988406935552, b)
+	_, b = bits.Sub64(_z[2], 7519677619439740768, b)
+	_, b = bits.Sub64(_z[3], 11637676499754480623, b)
+	_, b = bits.Sub64(_z[4], 8163168546618811218, b)
+	_, b = bits.Sub64(_z[5], 378118636952580899, b)
+	_, b = bits.Sub64(_z[6], 17690530803068599637, b)
+	_, b = bits.Sub64(_z[7], 7377836520680792940, b)
+	_, b = bits.Sub64(_z[8], 18300686120035856851, b)
+	_, b = bits.Sub64(_z[9], 12559350142230161220, b)
+	_, b = bits.Sub64(_z[10], 8166725140723971175, b)
+	_, b = bits.Sub64(_z[11], 2176306597715141, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [96]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[6] = binary.BigEndian.Uint64(bytes[48:56])
+	z[7] = binary.BigEndian.Uint64(bytes[56:64])
+	z[8] = binary.BigEndian.Uint64(bytes[64:72])
+	z[9] = binary.BigEndian.Uint64(bytes[72:80])
+	z[10] = binary.BigEndian.Uint64(bytes[80:88])
+	z[11] = binary.BigEndian.Uint64(bytes[88:96])
+	z[11] %= 4352613195430282
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 1, 0)
+		z[1], carry = bits.Add64(z[1], 3731203976813871104, carry)
+		z[2], carry = bits.Add64(z[2], 15039355238879481536, carry)
+		z[3], carry = bits.Add64(z[3], 4828608925799409630, carry)
+		z[4], carry = bits.Add64(z[4], 16326337093237622437, carry)
+		z[5], carry = bits.Add64(z[5], 756237273905161798, carry)
+		z[6], carry = bits.Add64(z[6], 16934317532427647658, carry)
+		z[7], carry = bits.Add64(z[7], 14755673041361585881, carry)
+		z[8], carry = bits.Add64(z[8], 18154628166362162086, carry)
+		z[9], carry = bits.Add64(z[9], 6671956210750770825, carry)
+		z[10], carry = bits.Add64(z[10], 16333450281447942351, carry)
+		z[11], _ = bits.Add64(z[11], 4352613195430282, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] = z[5]>>1 | z[6]<<63
+	z[6] = z[6]>>1 | z[7]<<63
+	z[7] = z[7]>>1 | z[8]<<63
+	z[8] = z[8]>>1 | z[9]<<63
+	z[9] = z[9]>>1 | z[10]<<63
+	z[10] = z[10]>>1 | z[11]<<63
+	z[11] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [12]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd1(v, y[6], c[1])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd1(v, y[7], c[1])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd1(v, y[8], c[1])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd1(v, y[9], c[1])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd1(v, y[10], c[1])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd1(v, y[11], c[1])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 6
+		v := x[6]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 7
+		v := x[7]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 8
+		v := x[8]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 9
+		v := x[9]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 10
+		v := x[10]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], t[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], t[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], t[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], t[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], t[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], t[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		t[11], t[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+	{
+		// round 11
+		v := x[11]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 18446744073709551615
+		c[2] = madd0(m, 1, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 3731203976813871104, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 15039355238879481536, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 4828608925799409630, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 16326337093237622437, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		c[2], z[4] = madd2(m, 756237273905161798, c[2], c[0])
+		c[1], c[0] = madd2(v, y[6], c[1], t[6])
+		c[2], z[5] = madd2(m, 16934317532427647658, c[2], c[0])
+		c[1], c[0] = madd2(v, y[7], c[1], t[7])
+		c[2], z[6] = madd2(m, 14755673041361585881, c[2], c[0])
+		c[1], c[0] = madd2(v, y[8], c[1], t[8])
+		c[2], z[7] = madd2(m, 18154628166362162086, c[2], c[0])
+		c[1], c[0] = madd2(v, y[9], c[1], t[9])
+		c[2], z[8] = madd2(m, 6671956210750770825, c[2], c[0])
+		c[1], c[0] = madd2(v, y[10], c[1], t[10])
+		c[2], z[9] = madd2(m, 16333450281447942351, c[2], c[0])
+		c[1], c[0] = madd2(v, y[11], c[1], t[11])
+		z[11], z[10] = madd3(m, 4352613195430282, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [12]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 18446744073709551615
+		c2 := madd0(m, 1, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 3731203976813871104, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 15039355238879481536, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 4828608925799409630, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 16326337093237622437, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		c2, t[4] = madd2(m, 756237273905161798, c2, c0)
+		c1, c0 = madd1(y, x[6], c1)
+		c2, t[5] = madd2(m, 16934317532427647658, c2, c0)
+		c1, c0 = madd1(y, x[7], c1)
+		c2, t[6] = madd2(m, 14755673041361585881, c2, c0)
+		c1, c0 = madd1(y, x[8], c1)
+		c2, t[7] = madd2(m, 18154628166362162086, c2, c0)
+		c1, c0 = madd1(y, x[9], c1)
+		c2, t[8] = madd2(m, 6671956210750770825, c2, c0)
+		c1, c0 = madd1(y, x[10], c1)
+		c2, t[9] = madd2(m, 16333450281447942351, c2, c0)
+		c1, c0 = madd1(y, x[11], c1)
+		t[11], t[10] = madd3(m, 4352613195430282, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 6
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 7
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 8
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 9
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 10
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, t[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, t[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, t[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, t[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, t[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, t[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, t[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, t[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, t[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, t[9] = madd2(m, 16333450281447942351, c2, t[10])
+		t[11], t[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+	{
+		// round 11
+		m := t[0] * 18446744073709551615
+		c2 := madd0(m, 1, t[0])
+		c2, z[0] = madd2(m, 3731203976813871104, c2, t[1])
+		c2, z[1] = madd2(m, 15039355238879481536, c2, t[2])
+		c2, z[2] = madd2(m, 4828608925799409630, c2, t[3])
+		c2, z[3] = madd2(m, 16326337093237622437, c2, t[4])
+		c2, z[4] = madd2(m, 756237273905161798, c2, t[5])
+		c2, z[5] = madd2(m, 16934317532427647658, c2, t[6])
+		c2, z[6] = madd2(m, 14755673041361585881, c2, t[7])
+		c2, z[7] = madd2(m, 18154628166362162086, c2, t[8])
+		c2, z[8] = madd2(m, 6671956210750770825, c2, t[9])
+		c2, z[9] = madd2(m, 16333450281447942351, c2, t[10])
+		z[11], z[10] = madd2(m, 4352613195430282, t[11], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 18446744073709551615
+		C := madd0(m, 1, z[0])
+		C, z[0] = madd2(m, 3731203976813871104, z[1], C)
+		C, z[1] = madd2(m, 15039355238879481536, z[2], C)
+		C, z[2] = madd2(m, 4828608925799409630, z[3], C)
+		C, z[3] = madd2(m, 16326337093237622437, z[4], C)
+		C, z[4] = madd2(m, 756237273905161798, z[5], C)
+		C, z[5] = madd2(m, 16934317532427647658, z[6], C)
+		C, z[6] = madd2(m, 14755673041361585881, z[7], C)
+		C, z[7] = madd2(m, 18154628166362162086, z[8], C)
+		C, z[8] = madd2(m, 6671956210750770825, z[9], C)
+		C, z[9] = madd2(m, 16333450281447942351, z[10], C)
+		C, z[10] = madd2(m, 4352613195430282, z[11], C)
+		z[11] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], carry = bits.Add64(x[5], y[5], carry)
+	z[6], carry = bits.Add64(x[6], y[6], carry)
+	z[7], carry = bits.Add64(x[7], y[7], carry)
+	z[8], carry = bits.Add64(x[8], y[8], carry)
+	z[9], carry = bits.Add64(x[9], y[9], carry)
+	z[10], carry = bits.Add64(x[10], y[10], carry)
+	z[11], _ = bits.Add64(x[11], y[11], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], carry = bits.Add64(x[5], x[5], carry)
+	z[6], carry = bits.Add64(x[6], x[6], carry)
+	z[7], carry = bits.Add64(x[7], x[7], carry)
+	z[8], carry = bits.Add64(x[8], x[8], carry)
+	z[9], carry = bits.Add64(x[9], x[9], carry)
+	z[10], carry = bits.Add64(x[10], x[10], carry)
+	z[11], _ = bits.Add64(x[11], x[11], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	z[6], b = bits.Sub64(x[6], y[6], b)
+	z[7], b = bits.Sub64(x[7], y[7], b)
+	z[8], b = bits.Sub64(x[8], y[8], b)
+	z[9], b = bits.Sub64(x[9], y[9], b)
+	z[10], b = bits.Sub64(x[10], y[10], b)
+	z[11], b = bits.Sub64(x[11], y[11], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 1, 0)
+		z[1], c = bits.Add64(z[1], 3731203976813871104, c)
+		z[2], c = bits.Add64(z[2], 15039355238879481536, c)
+		z[3], c = bits.Add64(z[3], 4828608925799409630, c)
+		z[4], c = bits.Add64(z[4], 16326337093237622437, c)
+		z[5], c = bits.Add64(z[5], 756237273905161798, c)
+		z[6], c = bits.Add64(z[6], 16934317532427647658, c)
+		z[7], c = bits.Add64(z[7], 14755673041361585881, c)
+		z[8], c = bits.Add64(z[8], 18154628166362162086, c)
+		z[9], c = bits.Add64(z[9], 6671956210750770825, c)
+		z[10], c = bits.Add64(z[10], 16333450281447942351, c)
+		z[11], _ = bits.Add64(z[11], 4352613195430282, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(1, x[0], 0)
+	z[1], borrow = bits.Sub64(3731203976813871104, x[1], borrow)
+	z[2], borrow = bits.Sub64(15039355238879481536, x[2], borrow)
+	z[3], borrow = bits.Sub64(4828608925799409630, x[3], borrow)
+	z[4], borrow = bits.Sub64(16326337093237622437, x[4], borrow)
+	z[5], borrow = bits.Sub64(756237273905161798, x[5], borrow)
+	z[6], borrow = bits.Sub64(16934317532427647658, x[6], borrow)
+	z[7], borrow = bits.Sub64(14755673041361585881, x[7], borrow)
+	z[8], borrow = bits.Sub64(18154628166362162086, x[8], borrow)
+	z[9], borrow = bits.Sub64(6671956210750770825, x[9], borrow)
+	z[10], borrow = bits.Sub64(16333450281447942351, x[10], borrow)
+	z[11], _ = bits.Sub64(4352613195430282, x[11], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[11] != 0 {
+		return 704 + bits.Len64(z[11])
+	}
+	if z[10] != 0 {
+		return 640 + bits.Len64(z[10])
+	}
+	if z[9] != 0 {
+		return 576 + bits.Len64(z[9])
+	}
+	if z[8] != 0 {
+		return 512 + bits.Len64(z[8])
+	}
+	if z[7] != 0 {
+		return 448 + bits.Len64(z[7])
+	}
+	if z[6] != 0 {
+		return 384 + bits.Len64(z[6])
+	}
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[88:96], z[0])
+	binary.BigEndian.PutUint64(b[80:88], z[1])
+	binary.BigEndian.PutUint64(b[72:80], z[2])
+	binary.BigEndian.PutUint64(b[64:72], z[3])
+	binary.BigEndian.PutUint64(b[56:64], z[4])
+	binary.BigEndian.PutUint64(b[48:56], z[5])
+	binary.BigEndian.PutUint64(b[40:48], z[6])
+	binary.BigEndian.PutUint64(b[32:40], z[7])
+	binary.BigEndian.PutUint64(b[24:32], z[8])
+	binary.BigEndian.PutUint64(b[16:24], z[9])
+	binary.BigEndian.PutUint64(b[8:16], z[10])
+	binary.BigEndian.PutUint64(b[0:8], z[11])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[88:96], _z[0])
+	binary.BigEndian.PutUint64(res[80:88], _z[1])
+	binary.BigEndian.PutUint64(res[72:80], _z[2])
+	binary.BigEndian.PutUint64(res[64:72], _z[3])
+	binary.BigEndian.PutUint64(res[56:64], _z[4])
+	binary.BigEndian.PutUint64(res[48:56], _z[5])
+	binary.BigEndian.PutUint64(res[40:48], _z[6])
+	binary.BigEndian.PutUint64(res[32:40], _z[7])
+	binary.BigEndian.PutUint64(res[24:32], _z[8])
+	binary.BigEndian.PutUint64(res[16:24], _z[9])
+	binary.BigEndian.PutUint64(res[8:16], _z[10])
+	binary.BigEndian.PutUint64(res[0:8], _z[11])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[11] == 369351476012747) && (l[10] == 9468215855567529777) && (l[9] == 3108243834975866807) && (l[8] == 2055362399696866477) && (l[7] == 18366804658688562287) && (l[6] == 8643488375494563078) && (l[5] == 4799902015386277509) && (l[4] == 2720419343484222500) && (l[3] == 12241294279704278364) && (l[2] == 15160016368967634470) && (l[1] == 14463961505609547775) && (l[0] == 18446744073709547378) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		17302715199413996045,
+		15077845457253267709,
+		8842885729139027579,
+		12189878420705505575,
+		12380986790262239346,
+		585111498723936856,
+		4947215576903759546,
+		1186632482028566920,
+		14543050817583235372,
+		5644943604719368358,
+		9440830989708189862,
+		1039766423535362,
+	}
+	r := uint64(82)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[11] == 369351476012747) && (t[10] == 9468215855567529777) && (t[9] == 3108243834975866807) && (t[8] == 2055362399696866477) && (t[7] == 18366804658688562287) && (t[6] == 8643488375494563078) && (t[5] == 4799902015386277509) && (t[4] == 2720419343484222500) && (t[3] == 12241294279704278364) && (t[2] == 15160016368967634470) && (t[1] == 14463961505609547775) && (t[0] == 18446744073709547378)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[11] == 369351476012747) && (t[10] == 9468215855567529777) && (t[9] == 3108243834975866807) && (t[8] == 2055362399696866477) && (t[7] == 18366804658688562287) && (t[6] == 8643488375494563078) && (t[5] == 4799902015386277509) && (t[4] == 2720419343484222500) && (t[3] == 12241294279704278364) && (t[2] == 15160016368967634470) && (t[1] == 14463961505609547775) && (t[0] == 18446744073709547378)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 16061512306393401370
+const inversionCorrectionFactorWord1 = 12469388396993975658
+const inversionCorrectionFactorWord2 = 12941199289357671440
+const inversionCorrectionFactorWord3 = 7124172912896157387
+const inversionCorrectionFactorWord4 = 7772575019676086033
+const inversionCorrectionFactorWord5 = 5410978411075096125
+const inversionCorrectionFactorWord6 = 15135850590536056079
+const inversionCorrectionFactorWord7 = 14366933837510102702
+const inversionCorrectionFactorWord8 = 17864238268145908760
+const inversionCorrectionFactorWord9 = 11845167622525040086
+const inversionCorrectionFactorWord10 = 12428223085045138512
+const inversionCorrectionFactorWord11 = 2992926161591192
+
+const invIterationsN = 50
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+		qElementWord6,
+		qElementWord7,
+		qElementWord8,
+		qElementWord9,
+		qElementWord10,
+		qElementWord11,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | ((a[6]) << approxHighBitsN)
+		a[6] = (a[6] >> approxLowBitsN) | ((a[7]) << approxHighBitsN)
+		a[7] = (a[7] >> approxLowBitsN) | ((a[8]) << approxHighBitsN)
+		a[8] = (a[8] >> approxLowBitsN) | ((a[9]) << approxHighBitsN)
+		a[9] = (a[9] >> approxLowBitsN) | ((a[10]) << approxHighBitsN)
+		a[10] = (a[10] >> approxLowBitsN) | ((a[11]) << approxHighBitsN)
+		a[11] = (a[11] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | ((b[6]) << approxHighBitsN)
+		b[6] = (b[6] >> approxLowBitsN) | ((b[7]) << approxHighBitsN)
+		b[7] = (b[7] >> approxLowBitsN) | ((b[8]) << approxHighBitsN)
+		b[8] = (b[8] >> approxLowBitsN) | ((b[9]) << approxHighBitsN)
+		b[9] = (b[9] >> approxLowBitsN) | ((b[10]) << approxHighBitsN)
+		b[10] = (b[10] >> approxLowBitsN) | ((b[11]) << approxHighBitsN)
+		b[11] = (b[11] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+		inversionCorrectionFactorWord6,
+		inversionCorrectionFactorWord7,
+		inversionCorrectionFactorWord8,
+		inversionCorrectionFactorWord9,
+		inversionCorrectionFactorWord10,
+		inversionCorrectionFactorWord11,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+	C, t[6] = madd2(m, qElementWord6, x[6], C)
+	C, t[7] = madd2(m, qElementWord7, x[7], C)
+	C, t[8] = madd2(m, qElementWord8, x[8], C)
+	C, t[9] = madd2(m, qElementWord9, x[9], C)
+	C, t[10] = madd2(m, qElementWord10, x[10], C)
+	C, t[11] = madd2(m, qElementWord11, x[11], C)
+
+	// the high word of m * qElement[11] is at most 62 bits
+	// x[11] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[12] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 6
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 7
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 8
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 9
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 10
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+		C, t[i+6] = madd2(m, qElementWord6, t[i+6], C)
+		C, t[i+7] = madd2(m, qElementWord7, t[i+7], C)
+		C, t[i+8] = madd2(m, qElementWord8, t[i+8], C)
+		C, t[i+9] = madd2(m, qElementWord9, t[i+9], C)
+		C, t[i+10] = madd2(m, qElementWord10, t[i+10], C)
+		C, t[i+11] = madd2(m, qElementWord11, t[i+11], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 11
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		C, z[4] = madd2(m, qElementWord5, t[i+5], C)
+		C, z[5] = madd2(m, qElementWord6, t[i+6], C)
+		C, z[6] = madd2(m, qElementWord7, t[i+7], C)
+		C, z[7] = madd2(m, qElementWord8, t[i+8], C)
+		C, z[8] = madd2(m, qElementWord9, t[i+9], C)
+		C, z[9] = madd2(m, qElementWord10, t[i+10], C)
+		z[11], z[10] = madd2(m, qElementWord11, t[i+11], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[11] < 4352613195430282 || (z[11] == 4352613195430282 && (z[10] < 16333450281447942351 || (z[10] == 16333450281447942351 && (z[9] < 6671956210750770825 || (z[9] == 6671956210750770825 && (z[8] < 18154628166362162086 || (z[8] == 18154628166362162086 && (z[7] < 14755673041361585881 || (z[7] == 14755673041361585881 && (z[6] < 16934317532427647658 || (z[6] == 16934317532427647658 && (z[5] < 756237273905161798 || (z[5] == 756237273905161798 && (z[4] < 16326337093237622437 || (z[4] == 16326337093237622437 && (z[3] < 4828608925799409630 || (z[3] == 4828608925799409630 && (z[2] < 15039355238879481536 || (z[2] == 15039355238879481536 && (z[1] < 3731203976813871104 || (z[1] == 3731203976813871104 && (z[0] < 1))))))))))))))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 1, 0)
+		z[1], b = bits.Sub64(z[1], 3731203976813871104, b)
+		z[2], b = bits.Sub64(z[2], 15039355238879481536, b)
+		z[3], b = bits.Sub64(z[3], 4828608925799409630, b)
+		z[4], b = bits.Sub64(z[4], 16326337093237622437, b)
+		z[5], b = bits.Sub64(z[5], 756237273905161798, b)
+		z[6], b = bits.Sub64(z[6], 16934317532427647658, b)
+		z[7], b = bits.Sub64(z[7], 14755673041361585881, b)
+		z[8], b = bits.Sub64(z[8], 18154628166362162086, b)
+		z[9], b = bits.Sub64(z[9], 6671956210750770825, b)
+		z[10], b = bits.Sub64(z[10], 16333450281447942351, b)
+		z[11], _ = bits.Sub64(z[11], 4352613195430282, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+		z[6], b = bits.Sub64(z[6], 0, b)
+		z[7], b = bits.Sub64(z[7], 0, b)
+		z[8], b = bits.Sub64(z[8], 0, b)
+		z[9], b = bits.Sub64(z[9], 0, b)
+		z[10], b = bits.Sub64(z[10], 0, b)
+		z[11], b = bits.Sub64(z[11], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[11] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], b = bits.Add64(z[5], qElementWord5, b)
+			z[6], b = bits.Add64(z[6], qElementWord6, b)
+			z[7], b = bits.Add64(z[7], qElementWord7, b)
+			z[8], b = bits.Add64(z[8], qElementWord8, b)
+			z[9], b = bits.Add64(z[9], qElementWord9, b)
+			z[10], b = bits.Add64(z[10], qElementWord10, b)
+			z[11], _ = bits.Add64(neg1, qElementWord11, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	z[6], b = bits.Sub64(0, x[6], b)
+	z[7], b = bits.Sub64(0, x[7], b)
+	z[8], b = bits.Sub64(0, x[8], b)
+	z[9], b = bits.Sub64(0, x[9], b)
+	z[10], b = bits.Sub64(0, x[10], b)
+	z[11], b = bits.Sub64(0, x[11], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+	c, z[6] = madd1(x[6], w, c)
+	c, z[7] = madd1(x[7], w, c)
+	c, z[8] = madd1(x[8], w, c)
+	c, z[9] = madd1(x[9], w, c)
+	c, z[10] = madd1(x[10], w, c)
+	c, z[11] = madd1(x[11], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 6 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 6 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[6], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 7 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 7 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[7], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 8 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 8 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[8], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 9 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 9 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[9], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 10 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 10 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[10], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 11 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 11 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[11], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 12 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 11
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+	z[6], carry = bits.Add64(z[6], yTimes[6], carry)
+	z[7], carry = bits.Add64(z[7], yTimes[7], carry)
+	z[8], carry = bits.Add64(z[8], yTimes[8], carry)
+	z[9], carry = bits.Add64(z[9], yTimes[9], carry)
+	z[10], carry = bits.Add64(z[10], yTimes[10], carry)
+	z[11], carry = bits.Add64(z[11], yTimes[11], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bw6-756/fp/element_exp.go b/ecc/bw6-756/fp/element_exp.go
new file mode 100644
index 000000000..af741d643
--- /dev/null
+++ b/ecc/bw6-756/fp/element_exp.go
@@ -0,0 +1,1993 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// expBySqrtExp is equivalent to z.Exp(x, 1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_100     = 1 + _11
+	//	_101     = 1 + _100
+	//	_1001    = _100 + _101
+	//	_1011    = _10 + _1001
+	//	_1101    = _10 + _1011
+	//	_1111    = _10 + _1101
+	//	_10001   = _10 + _1111
+	//	_10101   = _100 + _10001
+	//	_10111   = _10 + _10101
+	//	_11001   = _10 + _10111
+	//	_11011   = _10 + _11001
+	//	_11101   = _10 + _11011
+	//	_11111   = _10 + _11101
+	//	_100001  = _10 + _11111
+	//	_100011  = _10 + _100001
+	//	_100101  = _10 + _100011
+	//	_100111  = _10 + _100101
+	//	_101001  = _10 + _100111
+	//	_101011  = _10 + _101001
+	//	_101101  = _10 + _101011
+	//	_101111  = _10 + _101101
+	//	_110001  = _10 + _101111
+	//	_110011  = _10 + _110001
+	//	_110101  = _10 + _110011
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111010 = _111011 + _111111
+	//	i52      = ((_1111010 << 4 + _11011) << 7 + _101011) << 7
+	//	i67      = ((_110111 + i52) << 7 + _110101) << 5 + _10111
+	//	i87      = ((i67 << 7 + _111001) << 5 + _10001) << 6
+	//	i101     = ((_10111 + i87) << 8 + _10101) << 3 + _11
+	//	i128     = ((i101 << 9 + _1001) << 8 + _111111) << 8
+	//	i145     = ((_1111 + i128) << 9 + _110101) << 5 + _1101
+	//	i167     = ((i145 << 9 + _110011) << 6 + _110101) << 5
+	//	i187     = ((_11001 + i167) << 8 + _101111) << 9 + _110011
+	//	i205     = ((i187 << 7 + _100101) << 6 + _111101) << 3
+	//	i223     = ((_11 + i205) << 8 + _1011) << 7 + _11101
+	//	i244     = ((i223 << 9 + _100111) << 6 + _111011) << 4
+	//	i262     = ((_1111 + i244) << 8 + _100011) << 7 + _10001
+	//	i285     = ((i262 << 7 + _101) << 8 + _10101) << 6
+	//	i299     = ((_10001 + i285) << 7 + _110001) << 4 + _1101
+	//	i325     = ((i299 << 7 + _11011) << 8 + _110011) << 9
+	//	i341     = ((_110101 + i325) << 7 + _111001) << 6 + _110011
+	//	i366     = ((i341 << 6 + _110001) << 9 + _10101) << 8
+	//	i383     = ((_100011 + i366) << 6 + _11011) << 8 + _111101
+	//	i401     = ((i383 << 3 + _11) << 10 + _1011) << 3
+	//	i422     = ((1 + i401) << 12 + _100101) << 6 + _110101
+	//	i448     = ((i422 << 12 + _100111) << 6 + _110101) << 6
+	//	i467     = ((_10101 + i448) << 11 + _101001) << 5 + _11111
+	//	i490     = ((i467 << 5 + _1011) << 9 + _111001) << 7
+	//	i508     = ((_110011 + i490) << 4 + _1101) << 11 + _110111
+	//	i535     = ((i508 << 7 + _11001) << 9 + _110111) << 9
+	//	i550     = ((_101001 + i535) << 6 + _1011) << 6 + _1101
+	//	i572     = ((i550 << 9 + _101011) << 5 + _11011) << 6
+	//	i590     = ((_11011 + i572) << 6 + _11001) << 9 + _110101
+	//	i616     = ((i590 << 7 + _10101) << 6 + _11) << 11
+	//	i630     = ((_10101 + i616) << 4 + _101) << 7 + _1111
+	//	i653     = ((i630 << 10 + _100101) << 6 + _100011) << 5
+	//	i670     = ((_1111 + i653) << 7 + _11111) << 7 + _111101
+	//	i688     = ((i670 << 3 + _101) << 10 + _101101) << 3
+	//	i708     = ((_101 + i688) << 10 + _101111) << 7 + _100001
+	//	i731     = ((i708 << 3 + _101) << 10 + _101001) << 8
+	//	i751     = ((_100111 + i731) << 3 + _11) << 14 + _110011
+	//	i768     = ((i751 << 6 + _110001) << 5 + _11111) << 4
+	//	i781     = 2*((_11 + i768) << 9 + _111111) + 1
+	//	return     (i781 << 8 + _1101) << 4
+	//
+	// Operations: 667 squares 127 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+		t19 = new(Element)
+		t20 = new(Element)
+		t21 = new(Element)
+		t22 = new(Element)
+		t23 = new(Element)
+		t24 = new(Element)
+		t25 = new(Element)
+		t26 = new(Element)
+		t27 = new(Element)
+		t28 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24,t25,t26,t27,t28 Element
+	// Step 1: t0 = x^0x2
+	t0.Square(&x)
+
+	// Step 2: t1 = x^0x3
+	t1.Mul(&x, t0)
+
+	// Step 3: t2 = x^0x4
+	t2.Mul(&x, t1)
+
+	// Step 4: t7 = x^0x5
+	t7.Mul(&x, t2)
+
+	// Step 5: t26 = x^0x9
+	t26.Mul(t2, t7)
+
+	// Step 6: t20 = x^0xb
+	t20.Mul(t0, t26)
+
+	// Step 7: z = x^0xd
+	z.Mul(t0, t20)
+
+	// Step 8: t12 = x^0xf
+	t12.Mul(t0, z)
+
+	// Step 9: t23 = x^0x11
+	t23.Mul(t0, t12)
+
+	// Step 10: t15 = x^0x15
+	t15.Mul(t2, t23)
+
+	// Step 11: t27 = x^0x17
+	t27.Mul(t0, t15)
+
+	// Step 12: t17 = x^0x19
+	t17.Mul(t0, t27)
+
+	// Step 13: t18 = x^0x1b
+	t18.Mul(t0, t17)
+
+	// Step 14: t25 = x^0x1d
+	t25.Mul(t0, t18)
+
+	// Step 15: t2 = x^0x1f
+	t2.Mul(t0, t25)
+
+	// Step 16: t8 = x^0x21
+	t8.Mul(t0, t2)
+
+	// Step 17: t13 = x^0x23
+	t13.Mul(t0, t8)
+
+	// Step 18: t14 = x^0x25
+	t14.Mul(t0, t13)
+
+	// Step 19: t5 = x^0x27
+	t5.Mul(t0, t14)
+
+	// Step 20: t6 = x^0x29
+	t6.Mul(t0, t5)
+
+	// Step 21: t19 = x^0x2b
+	t19.Mul(t0, t6)
+
+	// Step 22: t10 = x^0x2d
+	t10.Mul(t0, t19)
+
+	// Step 23: t9 = x^0x2f
+	t9.Mul(t0, t10)
+
+	// Step 24: t3 = x^0x31
+	t3.Mul(t0, t9)
+
+	// Step 25: t4 = x^0x33
+	t4.Mul(t0, t3)
+
+	// Step 26: t16 = x^0x35
+	t16.Mul(t0, t4)
+
+	// Step 27: t21 = x^0x37
+	t21.Mul(t0, t16)
+
+	// Step 28: t22 = x^0x39
+	t22.Mul(t0, t21)
+
+	// Step 29: t24 = x^0x3b
+	t24.Mul(t0, t22)
+
+	// Step 30: t11 = x^0x3d
+	t11.Mul(t0, t24)
+
+	// Step 31: t0 = x^0x3f
+	t0.Mul(t0, t11)
+
+	// Step 32: t28 = x^0x7a
+	t28.Mul(t24, t0)
+
+	// Step 36: t28 = x^0x7a0
+	for s := 0; s < 4; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 37: t28 = x^0x7bb
+	t28.Mul(t18, t28)
+
+	// Step 44: t28 = x^0x3dd80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 45: t28 = x^0x3ddab
+	t28.Mul(t19, t28)
+
+	// Step 52: t28 = x^0x1eed580
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 53: t28 = x^0x1eed5b7
+	t28.Mul(t21, t28)
+
+	// Step 60: t28 = x^0xf76adb80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 61: t28 = x^0xf76adbb5
+	t28.Mul(t16, t28)
+
+	// Step 66: t28 = x^0x1eed5b76a0
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 67: t28 = x^0x1eed5b76b7
+	t28.Mul(t27, t28)
+
+	// Step 74: t28 = x^0xf76adbb5b80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 75: t28 = x^0xf76adbb5bb9
+	t28.Mul(t22, t28)
+
+	// Step 80: t28 = x^0x1eed5b76b7720
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 81: t28 = x^0x1eed5b76b7731
+	t28.Mul(t23, t28)
+
+	// Step 87: t28 = x^0x7bb56ddaddcc40
+	for s := 0; s < 6; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 88: t27 = x^0x7bb56ddaddcc57
+	t27.Mul(t27, t28)
+
+	// Step 96: t27 = x^0x7bb56ddaddcc5700
+	for s := 0; s < 8; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 97: t27 = x^0x7bb56ddaddcc5715
+	t27.Mul(t15, t27)
+
+	// Step 100: t27 = x^0x3ddab6ed6ee62b8a8
+	for s := 0; s < 3; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 101: t27 = x^0x3ddab6ed6ee62b8ab
+	t27.Mul(t1, t27)
+
+	// Step 110: t27 = x^0x7bb56ddaddcc5715600
+	for s := 0; s < 9; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 111: t26 = x^0x7bb56ddaddcc5715609
+	t26.Mul(t26, t27)
+
+	// Step 119: t26 = x^0x7bb56ddaddcc571560900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 120: t26 = x^0x7bb56ddaddcc57156093f
+	t26.Mul(t0, t26)
+
+	// Step 128: t26 = x^0x7bb56ddaddcc57156093f00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 129: t26 = x^0x7bb56ddaddcc57156093f0f
+	t26.Mul(t12, t26)
+
+	// Step 138: t26 = x^0xf76adbb5bb98ae2ac127e1e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 139: t26 = x^0xf76adbb5bb98ae2ac127e1e35
+	t26.Mul(t16, t26)
+
+	// Step 144: t26 = x^0x1eed5b76b77315c55824fc3c6a0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 145: t26 = x^0x1eed5b76b77315c55824fc3c6ad
+	t26.Mul(z, t26)
+
+	// Step 154: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 155: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33
+	t26.Mul(t4, t26)
+
+	// Step 161: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cc0
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 162: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cf5
+	t26.Mul(t16, t26)
+
+	// Step 167: t26 = x^0x1eed5b76b77315c55824fc3c6ad19ea0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 168: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb9
+	t26.Mul(t17, t26)
+
+	// Step 176: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 177: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f
+	t26.Mul(t9, t26)
+
+	// Step 186: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 187: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e33
+	t26.Mul(t4, t26)
+
+	// Step 194: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f1980
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 195: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5
+	t26.Mul(t14, t26)
+
+	// Step 201: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc66940
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 202: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d
+	t26.Mul(t11, t26)
+
+	// Step 205: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334be8
+	for s := 0; s < 3; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 206: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb
+	t26.Mul(t1, t26)
+
+	// Step 214: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 215: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b
+	t26.Mul(t20, t26)
+
+	// Step 222: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f58580
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 223: t25 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d
+	t25.Mul(t25, t26)
+
+	// Step 232: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a00
+	for s := 0; s < 9; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 233: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27
+	t25.Mul(t5, t25)
+
+	// Step 239: t25 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89c0
+	for s := 0; s < 6; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 240: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb
+	t24.Mul(t24, t25)
+
+	// Step 244: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb0
+	for s := 0; s < 4; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 245: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf
+	t24.Mul(t12, t24)
+
+	// Step 253: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf00
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 254: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23
+	t24.Mul(t13, t24)
+
+	// Step 261: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9180
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 262: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9191
+	t24.Mul(t23, t24)
+
+	// Step 269: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c880
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 270: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c885
+	t24.Mul(t7, t24)
+
+	// Step 278: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88500
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 279: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515
+	t24.Mul(t15, t24)
+
+	// Step 285: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214540
+	for s := 0; s < 6; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 286: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214551
+	t23.Mul(t23, t24)
+
+	// Step 293: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a880
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 294: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1
+	t23.Mul(t3, t23)
+
+	// Step 298: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b10
+	for s := 0; s < 4; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 299: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d
+	t23.Mul(z, t23)
+
+	// Step 306: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 307: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b
+	t23.Mul(t18, t23)
+
+	// Step 315: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 316: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b33
+	t23.Mul(t4, t23)
+
+	// Step 325: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 326: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635
+	t23.Mul(t16, t23)
+
+	// Step 333: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331a80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 334: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9
+	t23.Mul(t22, t23)
+
+	// Step 340: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 341: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73
+	t23.Mul(t4, t23)
+
+	// Step 347: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cc0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 348: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf1
+	t23.Mul(t3, t23)
+
+	// Step 357: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e200
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 358: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215
+	t23.Mul(t15, t23)
+
+	// Step 366: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21500
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 367: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21523
+	t23.Mul(t13, t23)
+
+	// Step 373: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 374: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db
+	t23.Mul(t18, t23)
+
+	// Step 382: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 383: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d
+	t23.Mul(t11, t23)
+
+	// Step 386: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9e8
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 387: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb
+	t23.Mul(t1, t23)
+
+	// Step 397: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac00
+	for s := 0; s < 10; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 398: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b
+	t23.Mul(t20, t23)
+
+	// Step 401: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6058
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 402: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059
+	t23.Mul(&x, t23)
+
+	// Step 414: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 415: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025
+	t23.Mul(t14, t23)
+
+	// Step 421: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640940
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 422: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975
+	t23.Mul(t16, t23)
+
+	// Step 434: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 435: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027
+	t23.Mul(t5, t23)
+
+	// Step 441: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 442: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f5
+	t23.Mul(t16, t23)
+
+	// Step 448: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 449: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55
+	t23.Mul(t15, t23)
+
+	// Step 460: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa800
+	for s := 0; s < 11; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 461: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829
+	t23.Mul(t6, t23)
+
+	// Step 466: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d550520
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 467: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f
+	t23.Mul(t2, t23)
+
+	// Step 472: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7e0
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 473: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb
+	t23.Mul(t20, t23)
+
+	// Step 482: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 483: t22 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd639
+	t22.Mul(t22, t23)
+
+	// Step 490: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1c80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 491: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3
+	t22.Mul(t4, t22)
+
+	// Step 495: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb30
+	for s := 0; s < 4; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 496: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d
+	t22.Mul(z, t22)
+
+	// Step 507: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e800
+	for s := 0; s < 11; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 508: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e837
+	t22.Mul(t21, t22)
+
+	// Step 515: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 516: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b99
+	t22.Mul(t17, t22)
+
+	// Step 525: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373200
+	for s := 0; s < 9; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 526: t21 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237
+	t21.Mul(t21, t22)
+
+	// Step 535: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e00
+	for s := 0; s < 9; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 536: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e29
+	t21.Mul(t6, t21)
+
+	// Step 542: t21 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a40
+	for s := 0; s < 6; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 543: t20 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b
+	t20.Mul(t20, t21)
+
+	// Step 549: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292c0
+	for s := 0; s < 6; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 550: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd
+	t20.Mul(z, t20)
+
+	// Step 559: t20 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a00
+	for s := 0; s < 9; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 560: t19 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2b
+	t19.Mul(t19, t20)
+
+	// Step 565: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b34560
+	for s := 0; s < 5; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 566: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b
+	t19.Mul(t18, t19)
+
+	// Step 572: t19 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15ec0
+	for s := 0; s < 6; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 573: t18 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb
+	t18.Mul(t18, t19)
+
+	// Step 579: t18 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 580: t17 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d9
+	t17.Mul(t17, t18)
+
+	// Step 589: t17 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db200
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 590: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db235
+	t16.Mul(t16, t17)
+
+	// Step 597: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a80
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 598: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a95
+	t16.Mul(t15, t16)
+
+	// Step 604: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a540
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 605: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a543
+	t16.Mul(t1, t16)
+
+	// Step 616: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1800
+	for s := 0; s < 11; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 617: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1815
+	t15.Mul(t15, t16)
+
+	// Step 621: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18150
+	for s := 0; s < 4; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 622: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18155
+	t15.Mul(t7, t15)
+
+	// Step 629: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa80
+	for s := 0; s < 7; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 630: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f
+	t15.Mul(t12, t15)
+
+	// Step 640: t15 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c00
+	for s := 0; s < 10; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 641: t14 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c25
+	t14.Mul(t14, t15)
+
+	// Step 647: t14 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0940
+	for s := 0; s < 6; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 648: t13 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963
+	t13.Mul(t13, t14)
+
+	// Step 653: t13 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c60
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 654: t12 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f
+	t12.Mul(t12, t13)
+
+	// Step 661: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963780
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 662: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f
+	t12.Mul(t2, t12)
+
+	// Step 669: t12 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcf80
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 670: t11 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbd
+	t11.Mul(t11, t12)
+
+	// Step 673: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7de8
+	for s := 0; s < 3; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 674: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded
+	t11.Mul(t7, t11)
+
+	// Step 684: t11 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b400
+	for s := 0; s < 10; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 685: t10 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42d
+	t10.Mul(t10, t11)
+
+	// Step 688: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda168
+	for s := 0; s < 3; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 689: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d
+	t10.Mul(t7, t10)
+
+	// Step 699: t10 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b400
+	for s := 0; s < 10; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 700: t9 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f
+	t9.Mul(t9, t10)
+
+	// Step 707: t9 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da1780
+	for s := 0; s < 7; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 708: t8 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1
+	t8.Mul(t8, t9)
+
+	// Step 711: t8 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd08
+	for s := 0; s < 3; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 712: t7 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d
+	t7.Mul(t7, t8)
+
+	// Step 722: t7 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43400
+	for s := 0; s < 10; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 723: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429
+	t6.Mul(t6, t7)
+
+	// Step 731: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 732: t5 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342927
+	t5.Mul(t5, t6)
+
+	// Step 735: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a14938
+	for s := 0; s < 3; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 736: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b
+	t5.Mul(t1, t5)
+
+	// Step 750: t5 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec000
+	for s := 0; s < 14; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 751: t4 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033
+	t4.Mul(t4, t5)
+
+	// Step 757: t4 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cc0
+	for s := 0; s < 6; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 758: t3 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1
+	t3.Mul(t3, t4)
+
+	// Step 763: t3 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e20
+	for s := 0; s < 5; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 764: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f
+	t2.Mul(t2, t3)
+
+	// Step 768: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f0
+	for s := 0; s < 4; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 769: t1 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f3
+	t1.Mul(t1, t2)
+
+	// Step 778: t1 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e600
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 779: t0 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f
+	t0.Mul(t0, t1)
+
+	// Step 780: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7e
+	t0.Square(t0)
+
+	// Step 781: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f
+	t0.Mul(&x, t0)
+
+	// Step 789: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f00
+	for s := 0; s < 8; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 790: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d
+	z.Mul(z, t0)
+
+	// Step 794: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_100     = 1 + _11
+	//	_101     = 1 + _100
+	//	_1001    = _100 + _101
+	//	_1011    = _10 + _1001
+	//	_1101    = _10 + _1011
+	//	_1111    = _10 + _1101
+	//	_10001   = _10 + _1111
+	//	_10101   = _100 + _10001
+	//	_10111   = _10 + _10101
+	//	_11001   = _10 + _10111
+	//	_11011   = _10 + _11001
+	//	_11101   = _10 + _11011
+	//	_11111   = _10 + _11101
+	//	_100001  = _10 + _11111
+	//	_100011  = _10 + _100001
+	//	_100101  = _10 + _100011
+	//	_100111  = _10 + _100101
+	//	_101001  = _10 + _100111
+	//	_101011  = _10 + _101001
+	//	_101101  = _10 + _101011
+	//	_101111  = _10 + _101101
+	//	_110001  = _10 + _101111
+	//	_110011  = _10 + _110001
+	//	_110101  = _10 + _110011
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111010 = _111011 + _111111
+	//	i52      = ((_1111010 << 4 + _11011) << 7 + _101011) << 7
+	//	i67      = ((_110111 + i52) << 7 + _110101) << 5 + _10111
+	//	i87      = ((i67 << 7 + _111001) << 5 + _10001) << 6
+	//	i101     = ((_10111 + i87) << 8 + _10101) << 3 + _11
+	//	i128     = ((i101 << 9 + _1001) << 8 + _111111) << 8
+	//	i145     = ((_1111 + i128) << 9 + _110101) << 5 + _1101
+	//	i167     = ((i145 << 9 + _110011) << 6 + _110101) << 5
+	//	i187     = ((_11001 + i167) << 8 + _101111) << 9 + _110011
+	//	i205     = ((i187 << 7 + _100101) << 6 + _111101) << 3
+	//	i223     = ((_11 + i205) << 8 + _1011) << 7 + _11101
+	//	i244     = ((i223 << 9 + _100111) << 6 + _111011) << 4
+	//	i262     = ((_1111 + i244) << 8 + _100011) << 7 + _10001
+	//	i285     = ((i262 << 7 + _101) << 8 + _10101) << 6
+	//	i299     = ((_10001 + i285) << 7 + _110001) << 4 + _1101
+	//	i325     = ((i299 << 7 + _11011) << 8 + _110011) << 9
+	//	i341     = ((_110101 + i325) << 7 + _111001) << 6 + _110011
+	//	i366     = ((i341 << 6 + _110001) << 9 + _10101) << 8
+	//	i383     = ((_100011 + i366) << 6 + _11011) << 8 + _111101
+	//	i401     = ((i383 << 3 + _11) << 10 + _1011) << 3
+	//	i422     = ((1 + i401) << 12 + _100101) << 6 + _110101
+	//	i448     = ((i422 << 12 + _100111) << 6 + _110101) << 6
+	//	i467     = ((_10101 + i448) << 11 + _101001) << 5 + _11111
+	//	i490     = ((i467 << 5 + _1011) << 9 + _111001) << 7
+	//	i508     = ((_110011 + i490) << 4 + _1101) << 11 + _110111
+	//	i535     = ((i508 << 7 + _11001) << 9 + _110111) << 9
+	//	i550     = ((_101001 + i535) << 6 + _1011) << 6 + _1101
+	//	i572     = ((i550 << 9 + _101011) << 5 + _11011) << 6
+	//	i590     = ((_11011 + i572) << 6 + _11001) << 9 + _110101
+	//	i616     = ((i590 << 7 + _10101) << 6 + _11) << 11
+	//	i630     = ((_10101 + i616) << 4 + _101) << 7 + _1111
+	//	i653     = ((i630 << 10 + _100101) << 6 + _100011) << 5
+	//	i670     = ((_1111 + i653) << 7 + _11111) << 7 + _111101
+	//	i688     = ((i670 << 3 + _101) << 10 + _101101) << 3
+	//	i708     = ((_101 + i688) << 10 + _101111) << 7 + _100001
+	//	i731     = ((i708 << 3 + _101) << 10 + _101001) << 8
+	//	i751     = ((_100111 + i731) << 3 + _11) << 14 + _110011
+	//	i768     = ((i751 << 6 + _110001) << 5 + _11111) << 4
+	//	i781     = 2*((_11 + i768) << 9 + _111111) + 1
+	//	return     ((i781 << 8 + _1101) << 5 + 1) << 81
+	//
+	// Operations: 749 squares 128 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+		t19 = new(Element)
+		t20 = new(Element)
+		t21 = new(Element)
+		t22 = new(Element)
+		t23 = new(Element)
+		t24 = new(Element)
+		t25 = new(Element)
+		t26 = new(Element)
+		t27 = new(Element)
+		t28 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21,t22,t23,t24,t25,t26,t27,t28 Element
+	// Step 1: t0 = x^0x2
+	t0.Square(&x)
+
+	// Step 2: t1 = x^0x3
+	t1.Mul(&x, t0)
+
+	// Step 3: t2 = x^0x4
+	t2.Mul(&x, t1)
+
+	// Step 4: t7 = x^0x5
+	t7.Mul(&x, t2)
+
+	// Step 5: t26 = x^0x9
+	t26.Mul(t2, t7)
+
+	// Step 6: t20 = x^0xb
+	t20.Mul(t0, t26)
+
+	// Step 7: z = x^0xd
+	z.Mul(t0, t20)
+
+	// Step 8: t12 = x^0xf
+	t12.Mul(t0, z)
+
+	// Step 9: t23 = x^0x11
+	t23.Mul(t0, t12)
+
+	// Step 10: t15 = x^0x15
+	t15.Mul(t2, t23)
+
+	// Step 11: t27 = x^0x17
+	t27.Mul(t0, t15)
+
+	// Step 12: t17 = x^0x19
+	t17.Mul(t0, t27)
+
+	// Step 13: t18 = x^0x1b
+	t18.Mul(t0, t17)
+
+	// Step 14: t25 = x^0x1d
+	t25.Mul(t0, t18)
+
+	// Step 15: t2 = x^0x1f
+	t2.Mul(t0, t25)
+
+	// Step 16: t8 = x^0x21
+	t8.Mul(t0, t2)
+
+	// Step 17: t13 = x^0x23
+	t13.Mul(t0, t8)
+
+	// Step 18: t14 = x^0x25
+	t14.Mul(t0, t13)
+
+	// Step 19: t5 = x^0x27
+	t5.Mul(t0, t14)
+
+	// Step 20: t6 = x^0x29
+	t6.Mul(t0, t5)
+
+	// Step 21: t19 = x^0x2b
+	t19.Mul(t0, t6)
+
+	// Step 22: t10 = x^0x2d
+	t10.Mul(t0, t19)
+
+	// Step 23: t9 = x^0x2f
+	t9.Mul(t0, t10)
+
+	// Step 24: t3 = x^0x31
+	t3.Mul(t0, t9)
+
+	// Step 25: t4 = x^0x33
+	t4.Mul(t0, t3)
+
+	// Step 26: t16 = x^0x35
+	t16.Mul(t0, t4)
+
+	// Step 27: t21 = x^0x37
+	t21.Mul(t0, t16)
+
+	// Step 28: t22 = x^0x39
+	t22.Mul(t0, t21)
+
+	// Step 29: t24 = x^0x3b
+	t24.Mul(t0, t22)
+
+	// Step 30: t11 = x^0x3d
+	t11.Mul(t0, t24)
+
+	// Step 31: t0 = x^0x3f
+	t0.Mul(t0, t11)
+
+	// Step 32: t28 = x^0x7a
+	t28.Mul(t24, t0)
+
+	// Step 36: t28 = x^0x7a0
+	for s := 0; s < 4; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 37: t28 = x^0x7bb
+	t28.Mul(t18, t28)
+
+	// Step 44: t28 = x^0x3dd80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 45: t28 = x^0x3ddab
+	t28.Mul(t19, t28)
+
+	// Step 52: t28 = x^0x1eed580
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 53: t28 = x^0x1eed5b7
+	t28.Mul(t21, t28)
+
+	// Step 60: t28 = x^0xf76adb80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 61: t28 = x^0xf76adbb5
+	t28.Mul(t16, t28)
+
+	// Step 66: t28 = x^0x1eed5b76a0
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 67: t28 = x^0x1eed5b76b7
+	t28.Mul(t27, t28)
+
+	// Step 74: t28 = x^0xf76adbb5b80
+	for s := 0; s < 7; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 75: t28 = x^0xf76adbb5bb9
+	t28.Mul(t22, t28)
+
+	// Step 80: t28 = x^0x1eed5b76b7720
+	for s := 0; s < 5; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 81: t28 = x^0x1eed5b76b7731
+	t28.Mul(t23, t28)
+
+	// Step 87: t28 = x^0x7bb56ddaddcc40
+	for s := 0; s < 6; s++ {
+		t28.Square(t28)
+	}
+
+	// Step 88: t27 = x^0x7bb56ddaddcc57
+	t27.Mul(t27, t28)
+
+	// Step 96: t27 = x^0x7bb56ddaddcc5700
+	for s := 0; s < 8; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 97: t27 = x^0x7bb56ddaddcc5715
+	t27.Mul(t15, t27)
+
+	// Step 100: t27 = x^0x3ddab6ed6ee62b8a8
+	for s := 0; s < 3; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 101: t27 = x^0x3ddab6ed6ee62b8ab
+	t27.Mul(t1, t27)
+
+	// Step 110: t27 = x^0x7bb56ddaddcc5715600
+	for s := 0; s < 9; s++ {
+		t27.Square(t27)
+	}
+
+	// Step 111: t26 = x^0x7bb56ddaddcc5715609
+	t26.Mul(t26, t27)
+
+	// Step 119: t26 = x^0x7bb56ddaddcc571560900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 120: t26 = x^0x7bb56ddaddcc57156093f
+	t26.Mul(t0, t26)
+
+	// Step 128: t26 = x^0x7bb56ddaddcc57156093f00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 129: t26 = x^0x7bb56ddaddcc57156093f0f
+	t26.Mul(t12, t26)
+
+	// Step 138: t26 = x^0xf76adbb5bb98ae2ac127e1e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 139: t26 = x^0xf76adbb5bb98ae2ac127e1e35
+	t26.Mul(t16, t26)
+
+	// Step 144: t26 = x^0x1eed5b76b77315c55824fc3c6a0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 145: t26 = x^0x1eed5b76b77315c55824fc3c6ad
+	t26.Mul(z, t26)
+
+	// Step 154: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 155: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33
+	t26.Mul(t4, t26)
+
+	// Step 161: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cc0
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 162: t26 = x^0xf76adbb5bb98ae2ac127e1e3568cf5
+	t26.Mul(t16, t26)
+
+	// Step 167: t26 = x^0x1eed5b76b77315c55824fc3c6ad19ea0
+	for s := 0; s < 5; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 168: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb9
+	t26.Mul(t17, t26)
+
+	// Step 176: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb900
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 177: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f
+	t26.Mul(t9, t26)
+
+	// Step 186: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e00
+	for s := 0; s < 9; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 187: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e33
+	t26.Mul(t4, t26)
+
+	// Step 194: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f1980
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 195: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5
+	t26.Mul(t14, t26)
+
+	// Step 201: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc66940
+	for s := 0; s < 6; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 202: t26 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d
+	t26.Mul(t11, t26)
+
+	// Step 205: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334be8
+	for s := 0; s < 3; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 206: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb
+	t26.Mul(t1, t26)
+
+	// Step 214: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb00
+	for s := 0; s < 8; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 215: t26 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b
+	t26.Mul(t20, t26)
+
+	// Step 222: t26 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f58580
+	for s := 0; s < 7; s++ {
+		t26.Square(t26)
+	}
+
+	// Step 223: t25 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d
+	t25.Mul(t25, t26)
+
+	// Step 232: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a00
+	for s := 0; s < 9; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 233: t25 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27
+	t25.Mul(t5, t25)
+
+	// Step 239: t25 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89c0
+	for s := 0; s < 6; s++ {
+		t25.Square(t25)
+	}
+
+	// Step 240: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb
+	t24.Mul(t24, t25)
+
+	// Step 244: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fb0
+	for s := 0; s < 4; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 245: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf
+	t24.Mul(t12, t24)
+
+	// Step 253: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf00
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 254: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23
+	t24.Mul(t13, t24)
+
+	// Step 261: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9180
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 262: t24 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf9191
+	t24.Mul(t23, t24)
+
+	// Step 269: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c880
+	for s := 0; s < 7; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 270: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c885
+	t24.Mul(t7, t24)
+
+	// Step 278: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88500
+	for s := 0; s < 8; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 279: t24 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515
+	t24.Mul(t15, t24)
+
+	// Step 285: t24 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214540
+	for s := 0; s < 6; s++ {
+		t24.Square(t24)
+	}
+
+	// Step 286: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf232214551
+	t23.Mul(t23, t24)
+
+	// Step 293: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a880
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 294: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1
+	t23.Mul(t3, t23)
+
+	// Step 298: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b10
+	for s := 0; s < 4; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 299: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d
+	t23.Mul(z, t23)
+
+	// Step 306: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 307: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b
+	t23.Mul(t18, t23)
+
+	// Step 315: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 316: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b33
+	t23.Mul(t4, t23)
+
+	// Step 325: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 326: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635
+	t23.Mul(t16, t23)
+
+	// Step 333: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331a80
+	for s := 0; s < 7; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 334: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9
+	t23.Mul(t22, t23)
+
+	// Step 340: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 341: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73
+	t23.Mul(t4, t23)
+
+	// Step 347: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cc0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 348: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf1
+	t23.Mul(t3, t23)
+
+	// Step 357: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e200
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 358: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215
+	t23.Mul(t15, t23)
+
+	// Step 366: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21500
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 367: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e21523
+	t23.Mul(t13, t23)
+
+	// Step 373: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 374: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db
+	t23.Mul(t18, t23)
+
+	// Step 382: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db00
+	for s := 0; s < 8; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 383: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d
+	t23.Mul(t11, t23)
+
+	// Step 386: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9e8
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 387: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb
+	t23.Mul(t1, t23)
+
+	// Step 397: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac00
+	for s := 0; s < 10; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 398: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b
+	t23.Mul(t20, t23)
+
+	// Step 401: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6058
+	for s := 0; s < 3; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 402: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059
+	t23.Mul(&x, t23)
+
+	// Step 414: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 415: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025
+	t23.Mul(t14, t23)
+
+	// Step 421: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640940
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 422: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975
+	t23.Mul(t16, t23)
+
+	// Step 434: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975000
+	for s := 0; s < 12; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 435: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027
+	t23.Mul(t5, t23)
+
+	// Step 441: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409c0
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 442: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f5
+	t23.Mul(t16, t23)
+
+	// Step 448: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d40
+	for s := 0; s < 6; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 449: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55
+	t23.Mul(t15, t23)
+
+	// Step 460: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa800
+	for s := 0; s < 11; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 461: t23 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829
+	t23.Mul(t6, t23)
+
+	// Step 466: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d550520
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 467: t23 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f
+	t23.Mul(t2, t23)
+
+	// Step 472: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7e0
+	for s := 0; s < 5; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 473: t23 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb
+	t23.Mul(t20, t23)
+
+	// Step 482: t23 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd600
+	for s := 0; s < 9; s++ {
+		t23.Square(t23)
+	}
+
+	// Step 483: t22 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd639
+	t22.Mul(t22, t23)
+
+	// Step 490: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1c80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 491: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3
+	t22.Mul(t4, t22)
+
+	// Step 495: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb30
+	for s := 0; s < 4; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 496: t22 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d
+	t22.Mul(z, t22)
+
+	// Step 507: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e800
+	for s := 0; s < 11; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 508: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e837
+	t22.Mul(t21, t22)
+
+	// Step 515: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b80
+	for s := 0; s < 7; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 516: t22 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b99
+	t22.Mul(t17, t22)
+
+	// Step 525: t22 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373200
+	for s := 0; s < 9; s++ {
+		t22.Square(t22)
+	}
+
+	// Step 526: t21 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237
+	t21.Mul(t21, t22)
+
+	// Step 535: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e00
+	for s := 0; s < 9; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 536: t21 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e29
+	t21.Mul(t6, t21)
+
+	// Step 542: t21 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a40
+	for s := 0; s < 6; s++ {
+		t21.Square(t21)
+	}
+
+	// Step 543: t20 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b
+	t20.Mul(t20, t21)
+
+	// Step 549: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292c0
+	for s := 0; s < 6; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 550: t20 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd
+	t20.Mul(z, t20)
+
+	// Step 559: t20 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a00
+	for s := 0; s < 9; s++ {
+		t20.Square(t20)
+	}
+
+	// Step 560: t19 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2b
+	t19.Mul(t19, t20)
+
+	// Step 565: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b34560
+	for s := 0; s < 5; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 566: t19 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b
+	t19.Mul(t18, t19)
+
+	// Step 572: t19 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15ec0
+	for s := 0; s < 6; s++ {
+		t19.Square(t19)
+	}
+
+	// Step 573: t18 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb
+	t18.Mul(t18, t19)
+
+	// Step 579: t18 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 580: t17 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d9
+	t17.Mul(t17, t18)
+
+	// Step 589: t17 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db200
+	for s := 0; s < 9; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 590: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db235
+	t16.Mul(t16, t17)
+
+	// Step 597: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a80
+	for s := 0; s < 7; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 598: t16 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a95
+	t16.Mul(t15, t16)
+
+	// Step 604: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a540
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 605: t16 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a543
+	t16.Mul(t1, t16)
+
+	// Step 616: t16 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1800
+	for s := 0; s < 11; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 617: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a1815
+	t15.Mul(t15, t16)
+
+	// Step 621: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18150
+	for s := 0; s < 4; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 622: t15 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a18155
+	t15.Mul(t7, t15)
+
+	// Step 629: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa80
+	for s := 0; s < 7; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 630: t15 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f
+	t15.Mul(t12, t15)
+
+	// Step 640: t15 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c00
+	for s := 0; s < 10; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 641: t14 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c25
+	t14.Mul(t14, t15)
+
+	// Step 647: t14 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0940
+	for s := 0; s < 6; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 648: t13 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963
+	t13.Mul(t13, t14)
+
+	// Step 653: t13 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c60
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 654: t12 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f
+	t12.Mul(t12, t13)
+
+	// Step 661: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f0963780
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 662: t12 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f
+	t12.Mul(t2, t12)
+
+	// Step 669: t12 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcf80
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 670: t11 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbd
+	t11.Mul(t11, t12)
+
+	// Step 673: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7de8
+	for s := 0; s < 3; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 674: t11 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded
+	t11.Mul(t7, t11)
+
+	// Step 684: t11 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b400
+	for s := 0; s < 10; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 685: t10 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42d
+	t10.Mul(t10, t11)
+
+	// Step 688: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda168
+	for s := 0; s < 3; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 689: t10 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d
+	t10.Mul(t7, t10)
+
+	// Step 699: t10 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b400
+	for s := 0; s < 10; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 700: t9 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f
+	t9.Mul(t9, t10)
+
+	// Step 707: t9 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da1780
+	for s := 0; s < 7; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 708: t8 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1
+	t8.Mul(t8, t9)
+
+	// Step 711: t8 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd08
+	for s := 0; s < 3; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 712: t7 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d
+	t7.Mul(t7, t8)
+
+	// Step 722: t7 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43400
+	for s := 0; s < 10; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 723: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429
+	t6.Mul(t6, t7)
+
+	// Step 731: t6 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342900
+	for s := 0; s < 8; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 732: t5 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f4342927
+	t5.Mul(t5, t6)
+
+	// Step 735: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a14938
+	for s := 0; s < 3; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 736: t5 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b
+	t5.Mul(t1, t5)
+
+	// Step 750: t5 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec000
+	for s := 0; s < 14; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 751: t4 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033
+	t4.Mul(t4, t5)
+
+	// Step 757: t4 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cc0
+	for s := 0; s < 6; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 758: t3 = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1
+	t3.Mul(t3, t4)
+
+	// Step 763: t3 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e20
+	for s := 0; s < 5; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 764: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f
+	t2.Mul(t2, t3)
+
+	// Step 768: t2 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f0
+	for s := 0; s < 4; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 769: t1 = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f3
+	t1.Mul(t1, t2)
+
+	// Step 778: t1 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e600
+	for s := 0; s < 9; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 779: t0 = x^0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f
+	t0.Mul(t0, t1)
+
+	// Step 780: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7e
+	t0.Square(t0)
+
+	// Step 781: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f
+	t0.Mul(&x, t0)
+
+	// Step 789: t0 = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f00
+	for s := 0; s < 8; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 790: z = x^0x1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d
+	z.Mul(z, t0)
+
+	// Step 795: z = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1f98fe1a0
+	for s := 0; s < 5; s++ {
+		z.Square(z)
+	}
+
+	// Step 796: z = x^0x3ddab6ed6ee62b8ab049f878d5a33d725e334beb0b3a27efc8c88515458e9b331ab9cf10a91b67ac0b204ba813eaa829fac72cf41b991b8a4b3457b6d91a950c0aa8f096379f7b42da17a1a1493b00cf1f98fe1a1
+	z.Mul(&x, z)
+
+	// Step 877: z = x^0x7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
+	for s := 0; s < 81; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bw6-756/fp/element_fuzz.go b/ecc/bw6-756/fp/element_fuzz.go
new file mode 100644
index 000000000..19d12cce9
--- /dev/null
+++ b/ecc/bw6-756/fp/element_fuzz.go
@@ -0,0 +1,200 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[11] %= qElement[11]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+		z[6], b = bits.Sub64(z[6], qElement[6], b)
+		z[7], b = bits.Sub64(z[7], qElement[7], b)
+		z[8], b = bits.Sub64(z[8], qElement[8], b)
+		z[9], b = bits.Sub64(z[9], qElement[9], b)
+		z[10], b = bits.Sub64(z[10], qElement[10], b)
+		z[11], b = bits.Sub64(z[11], qElement[11], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[11] > qElement[11] {
+		return true
+	}
+	if z[11] < qElement[11] {
+		return false
+	}
+
+	if z[10] > qElement[10] {
+		return true
+	}
+	if z[10] < qElement[10] {
+		return false
+	}
+
+	if z[9] > qElement[9] {
+		return true
+	}
+	if z[9] < qElement[9] {
+		return false
+	}
+
+	if z[8] > qElement[8] {
+		return true
+	}
+	if z[8] < qElement[8] {
+		return false
+	}
+
+	if z[7] > qElement[7] {
+		return true
+	}
+	if z[7] < qElement[7] {
+		return false
+	}
+
+	if z[6] > qElement[6] {
+		return true
+	}
+	if z[6] < qElement[6] {
+		return false
+	}
+
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bw6-756/fp/element_mul_adx_amd64.s b/ecc/bw6-756/fp/element_mul_adx_amd64.s
new file mode 100644
index 000000000..689a4e512
--- /dev/null
+++ b/ecc/bw6-756/fp/element_mul_adx_amd64.s
@@ -0,0 +1,2739 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $96-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	MOVQ x+8(FP), AX
+
+	// x[0] -> s0-8(SP)
+	// x[1] -> s1-16(SP)
+	// x[2] -> s2-24(SP)
+	// x[3] -> s3-32(SP)
+	// x[4] -> s4-40(SP)
+	// x[5] -> s5-48(SP)
+	// x[6] -> s6-56(SP)
+	// x[7] -> s7-64(SP)
+	// x[8] -> s8-72(SP)
+	// x[9] -> s9-80(SP)
+	// x[10] -> s10-88(SP)
+	// x[11] -> s11-96(SP)
+	MOVQ 0(AX), R14
+	MOVQ 8(AX), R15
+	MOVQ 16(AX), CX
+	MOVQ 24(AX), BX
+	MOVQ 32(AX), SI
+	MOVQ 40(AX), DI
+	MOVQ 48(AX), R8
+	MOVQ 56(AX), R9
+	MOVQ 64(AX), R10
+	MOVQ 72(AX), R11
+	MOVQ 80(AX), R12
+	MOVQ 88(AX), R13
+	MOVQ R14, s0-8(SP)
+	MOVQ R15, s1-16(SP)
+	MOVQ CX, s2-24(SP)
+	MOVQ BX, s3-32(SP)
+	MOVQ SI, s4-40(SP)
+	MOVQ DI, s5-48(SP)
+	MOVQ R8, s6-56(SP)
+	MOVQ R9, s7-64(SP)
+	MOVQ R10, s8-72(SP)
+	MOVQ R11, s9-80(SP)
+	MOVQ R12, s10-88(SP)
+	MOVQ R13, s11-96(SP)
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// t[6] -> R8
+	// t[7] -> R9
+	// t[8] -> R10
+	// t[9] -> R11
+	// t[10] -> R12
+	// t[11] -> R13
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 0(AX), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ s0-8(SP), R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ s1-16(SP), AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ s2-24(SP), AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ s3-32(SP), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ s4-40(SP), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ s5-48(SP), AX, R8
+	ADOXQ AX, DI
+
+	// (A,t[6])  := x[6]*y[0] + A
+	MULXQ s6-56(SP), AX, R9
+	ADOXQ AX, R8
+
+	// (A,t[7])  := x[7]*y[0] + A
+	MULXQ s7-64(SP), AX, R10
+	ADOXQ AX, R9
+
+	// (A,t[8])  := x[8]*y[0] + A
+	MULXQ s8-72(SP), AX, R11
+	ADOXQ AX, R10
+
+	// (A,t[9])  := x[9]*y[0] + A
+	MULXQ s9-80(SP), AX, R12
+	ADOXQ AX, R11
+
+	// (A,t[10])  := x[10]*y[0] + A
+	MULXQ s10-88(SP), AX, R13
+	ADOXQ AX, R12
+
+	// (A,t[11])  := x[11]*y[0] + A
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 8(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[1] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[1] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[1] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[1] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[1] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 16(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[2] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[2] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[2] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[2] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[2] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 24(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[3] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[3] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[3] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[3] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[3] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 32(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[4] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[4] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[4] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[4] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[4] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[4] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 40(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[5] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[5] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[5] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[5] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[5] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[5] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 48(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[6] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[6] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[6] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[6] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[6] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[6] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[6] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[6] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[6] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[6] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[6] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[6] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 56(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[7] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[7] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[7] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[7] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[7] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[7] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[7] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[7] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[7] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[7] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[7] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[7] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 64(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[8] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[8] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[8] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[8] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[8] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[8] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[8] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[8] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[8] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[8] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[8] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[8] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 72(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[9] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[9] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[9] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[9] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[9] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[9] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[9] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[9] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[9] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[9] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[9] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[9] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 80(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[10] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[10] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[10] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[10] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[10] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[10] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[10] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[10] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[10] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[10] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[10] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[10] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 88(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[11] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[11] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[11] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[11] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[11] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[11] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[11] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[11] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[11] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[11] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[11] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[11] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+TEXT ·fromMont(SB), $96-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	MOVQ 48(DX), R8
+	MOVQ 56(DX), R9
+	MOVQ 64(DX), R10
+	MOVQ 72(DX), R11
+	MOVQ 80(DX), R12
+	MOVQ 88(DX), R13
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
diff --git a/ecc/bw6-756/fp/element_mul_amd64.s b/ecc/bw6-756/fp/element_mul_amd64.s
new file mode 100644
index 000000000..738f1a4b6
--- /dev/null
+++ b/ecc/bw6-756/fp/element_mul_amd64.s
@@ -0,0 +1,2759 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $96-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), AX
+
+	// x[0] -> s0-8(SP)
+	// x[1] -> s1-16(SP)
+	// x[2] -> s2-24(SP)
+	// x[3] -> s3-32(SP)
+	// x[4] -> s4-40(SP)
+	// x[5] -> s5-48(SP)
+	// x[6] -> s6-56(SP)
+	// x[7] -> s7-64(SP)
+	// x[8] -> s8-72(SP)
+	// x[9] -> s9-80(SP)
+	// x[10] -> s10-88(SP)
+	// x[11] -> s11-96(SP)
+	MOVQ 0(AX), R14
+	MOVQ 8(AX), R15
+	MOVQ 16(AX), CX
+	MOVQ 24(AX), BX
+	MOVQ 32(AX), SI
+	MOVQ 40(AX), DI
+	MOVQ 48(AX), R8
+	MOVQ 56(AX), R9
+	MOVQ 64(AX), R10
+	MOVQ 72(AX), R11
+	MOVQ 80(AX), R12
+	MOVQ 88(AX), R13
+	MOVQ R14, s0-8(SP)
+	MOVQ R15, s1-16(SP)
+	MOVQ CX, s2-24(SP)
+	MOVQ BX, s3-32(SP)
+	MOVQ SI, s4-40(SP)
+	MOVQ DI, s5-48(SP)
+	MOVQ R8, s6-56(SP)
+	MOVQ R9, s7-64(SP)
+	MOVQ R10, s8-72(SP)
+	MOVQ R11, s9-80(SP)
+	MOVQ R12, s10-88(SP)
+	MOVQ R13, s11-96(SP)
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// t[6] -> R8
+	// t[7] -> R9
+	// t[8] -> R10
+	// t[9] -> R11
+	// t[10] -> R12
+	// t[11] -> R13
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 0(AX), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ s0-8(SP), R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ s1-16(SP), AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ s2-24(SP), AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ s3-32(SP), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ s4-40(SP), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ s5-48(SP), AX, R8
+	ADOXQ AX, DI
+
+	// (A,t[6])  := x[6]*y[0] + A
+	MULXQ s6-56(SP), AX, R9
+	ADOXQ AX, R8
+
+	// (A,t[7])  := x[7]*y[0] + A
+	MULXQ s7-64(SP), AX, R10
+	ADOXQ AX, R9
+
+	// (A,t[8])  := x[8]*y[0] + A
+	MULXQ s8-72(SP), AX, R11
+	ADOXQ AX, R10
+
+	// (A,t[9])  := x[9]*y[0] + A
+	MULXQ s9-80(SP), AX, R12
+	ADOXQ AX, R11
+
+	// (A,t[10])  := x[10]*y[0] + A
+	MULXQ s10-88(SP), AX, R13
+	ADOXQ AX, R12
+
+	// (A,t[11])  := x[11]*y[0] + A
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 8(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[1] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[1] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[1] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[1] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[1] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 16(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[2] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[2] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[2] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[2] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[2] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 24(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[3] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[3] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[3] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[3] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[3] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 32(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[4] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[4] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[4] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[4] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[4] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[4] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 40(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[5] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[5] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[5] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[5] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[5] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[5] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 48(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[6] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[6] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[6] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[6] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[6] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[6] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[6] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[6] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[6] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[6] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[6] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[6] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 56(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[7] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[7] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[7] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[7] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[7] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[7] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[7] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[7] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[7] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[7] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[7] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[7] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 64(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[8] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[8] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[8] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[8] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[8] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[8] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[8] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[8] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[8] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[8] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[8] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[8] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 72(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[9] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[9] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[9] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[9] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[9] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[9] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[9] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[9] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[9] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[9] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[9] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[9] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 80(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[10] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[10] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[10] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[10] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[10] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[10] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[10] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[10] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[10] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[10] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[10] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[10] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ y+16(FP), AX
+	MOVQ 88(AX), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[11] + A
+	MULXQ s0-8(SP), AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[11] + A
+	ADCXQ BP, R15
+	MULXQ s1-16(SP), AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[11] + A
+	ADCXQ BP, CX
+	MULXQ s2-24(SP), AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[11] + A
+	ADCXQ BP, BX
+	MULXQ s3-32(SP), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[11] + A
+	ADCXQ BP, SI
+	MULXQ s4-40(SP), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[11] + A
+	ADCXQ BP, DI
+	MULXQ s5-48(SP), AX, BP
+	ADOXQ AX, DI
+
+	// (A,t[6])  := t[6] + x[6]*y[11] + A
+	ADCXQ BP, R8
+	MULXQ s6-56(SP), AX, BP
+	ADOXQ AX, R8
+
+	// (A,t[7])  := t[7] + x[7]*y[11] + A
+	ADCXQ BP, R9
+	MULXQ s7-64(SP), AX, BP
+	ADOXQ AX, R9
+
+	// (A,t[8])  := t[8] + x[8]*y[11] + A
+	ADCXQ BP, R10
+	MULXQ s8-72(SP), AX, BP
+	ADOXQ AX, R10
+
+	// (A,t[9])  := t[9] + x[9]*y[11] + A
+	ADCXQ BP, R11
+	MULXQ s9-80(SP), AX, BP
+	ADOXQ AX, R11
+
+	// (A,t[10])  := t[10] + x[10]*y[11] + A
+	ADCXQ BP, R12
+	MULXQ s10-88(SP), AX, BP
+	ADOXQ AX, R12
+
+	// (A,t[11])  := t[11] + x[11]*y[11] + A
+	ADCXQ BP, R13
+	MULXQ s11-96(SP), AX, BP
+	ADOXQ AX, R13
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+	PUSHQ BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+	POPQ  BP
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+
+	// t[11] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ BP, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $96-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	MOVQ 48(DX), R8
+	MOVQ 56(DX), R9
+	MOVQ 64(DX), R10
+	MOVQ 72(DX), R11
+	MOVQ 80(DX), R12
+	MOVQ 88(DX), R13
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// (C,t[5]) := t[6] + m*q[6] + C
+	ADCXQ R8, DI
+	MULXQ q<>+48(SB), AX, R8
+	ADOXQ AX, DI
+
+	// (C,t[6]) := t[7] + m*q[7] + C
+	ADCXQ R9, R8
+	MULXQ q<>+56(SB), AX, R9
+	ADOXQ AX, R8
+
+	// (C,t[7]) := t[8] + m*q[8] + C
+	ADCXQ R10, R9
+	MULXQ q<>+64(SB), AX, R10
+	ADOXQ AX, R9
+
+	// (C,t[8]) := t[9] + m*q[9] + C
+	ADCXQ R11, R10
+	MULXQ q<>+72(SB), AX, R11
+	ADOXQ AX, R10
+
+	// (C,t[9]) := t[10] + m*q[10] + C
+	ADCXQ R12, R11
+	MULXQ q<>+80(SB), AX, R12
+	ADOXQ AX, R11
+
+	// (C,t[10]) := t[11] + m*q[11] + C
+	ADCXQ R13, R12
+	MULXQ q<>+88(SB), AX, R13
+	ADOXQ AX, R12
+	MOVQ  $0, AX
+	ADCXQ AX, R13
+	ADOXQ AX, R13
+
+	// reduce element(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13) using temp registers (s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP),s11-96(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	MOVQ R8, 48(AX)
+	MOVQ R9, 56(AX)
+	MOVQ R10, 64(AX)
+	MOVQ R11, 72(AX)
+	MOVQ R12, 80(AX)
+	MOVQ R13, 88(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bw6-756/fp/element_ops_amd64.go b/ecc/bw6-756/fp/element_ops_amd64.go
new file mode 100644
index 000000000..73a3711ec
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bw6-756/fp/element_ops_amd64.s b/ecc/bw6-756/fp/element_ops_amd64.s
new file mode 100644
index 000000000..2fd5939cd
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_amd64.s
@@ -0,0 +1,746 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $1
+DATA q<>+8(SB)/8, $0x33c7e63f86840000
+DATA q<>+16(SB)/8, $0xd0b685e868524ec0
+DATA q<>+24(SB)/8, $0x4302aa3c258de7de
+DATA q<>+32(SB)/8, $0xe292cd15edb646a5
+DATA q<>+40(SB)/8, $0x0a7eb1cb3d06e646
+DATA q<>+48(SB)/8, $0xeb02c812ea04faaa
+DATA q<>+56(SB)/8, $0xccc6ae73c42a46d9
+DATA q<>+64(SB)/8, $0xfbf23221455163a6
+DATA q<>+72(SB)/8, $0x5c978cd2fac2ce89
+DATA q<>+80(SB)/8, $0xe2ac127e1e3568cf
+DATA q<>+88(SB)/8, $0x000f76adbb5bb98a
+GLOBL q<>(SB), (RODATA+NOPTR), $96
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0xffffffffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, ra6, ra7, ra8, ra9, ra10, ra11, rb0, rb1, rb2, rb3, rb4, rb5, rb6, rb7, rb8, rb9, rb10, rb11) \
+	MOVQ    ra0, rb0;         \
+	SUBQ    q<>(SB), ra0;     \
+	MOVQ    ra1, rb1;         \
+	SBBQ    q<>+8(SB), ra1;   \
+	MOVQ    ra2, rb2;         \
+	SBBQ    q<>+16(SB), ra2;  \
+	MOVQ    ra3, rb3;         \
+	SBBQ    q<>+24(SB), ra3;  \
+	MOVQ    ra4, rb4;         \
+	SBBQ    q<>+32(SB), ra4;  \
+	MOVQ    ra5, rb5;         \
+	SBBQ    q<>+40(SB), ra5;  \
+	MOVQ    ra6, rb6;         \
+	SBBQ    q<>+48(SB), ra6;  \
+	MOVQ    ra7, rb7;         \
+	SBBQ    q<>+56(SB), ra7;  \
+	MOVQ    ra8, rb8;         \
+	SBBQ    q<>+64(SB), ra8;  \
+	MOVQ    ra9, rb9;         \
+	SBBQ    q<>+72(SB), ra9;  \
+	MOVQ    ra10, rb10;       \
+	SBBQ    q<>+80(SB), ra10; \
+	MOVQ    ra11, rb11;       \
+	SBBQ    q<>+88(SB), ra11; \
+	CMOVQCS rb0, ra0;         \
+	CMOVQCS rb1, ra1;         \
+	CMOVQCS rb2, ra2;         \
+	CMOVQCS rb3, ra3;         \
+	CMOVQCS rb4, ra4;         \
+	CMOVQCS rb5, ra5;         \
+	CMOVQCS rb6, ra6;         \
+	CMOVQCS rb7, ra7;         \
+	CMOVQCS rb8, ra8;         \
+	CMOVQCS rb9, ra9;         \
+	CMOVQCS rb10, ra10;       \
+	CMOVQCS rb11, ra11;       \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), $80-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ 48(AX), R10
+	MOVQ 56(AX), R11
+	MOVQ 64(AX), R12
+	MOVQ 72(AX), R13
+	MOVQ 80(AX), R14
+	MOVQ 88(AX), R15
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+	ADCQ 48(DX), R10
+	ADCQ 56(DX), R11
+	ADCQ 64(DX), R12
+	ADCQ 72(DX), R13
+	ADCQ 80(DX), R14
+	ADCQ 88(DX), R15
+
+	// reduce element(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15) using temp registers (AX,DX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,AX,DX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	MOVQ R10, 48(AX)
+	MOVQ R11, 56(AX)
+	MOVQ R12, 64(AX)
+	MOVQ R13, 72(AX)
+	MOVQ R14, 80(AX)
+	MOVQ R15, 88(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), R14
+	MOVQ 0(R14), AX
+	MOVQ 8(R14), DX
+	MOVQ 16(R14), CX
+	MOVQ 24(R14), BX
+	MOVQ 32(R14), SI
+	MOVQ 40(R14), DI
+	MOVQ 48(R14), R8
+	MOVQ 56(R14), R9
+	MOVQ 64(R14), R10
+	MOVQ 72(R14), R11
+	MOVQ 80(R14), R12
+	MOVQ 88(R14), R13
+	MOVQ y+16(FP), R14
+	SUBQ 0(R14), AX
+	SBBQ 8(R14), DX
+	SBBQ 16(R14), CX
+	SBBQ 24(R14), BX
+	SBBQ 32(R14), SI
+	SBBQ 40(R14), DI
+	SBBQ 48(R14), R8
+	SBBQ 56(R14), R9
+	SBBQ 64(R14), R10
+	SBBQ 72(R14), R11
+	SBBQ 80(R14), R12
+	SBBQ 88(R14), R13
+	JCC  l1
+	MOVQ $1, R15
+	ADDQ R15, AX
+	MOVQ $0x33c7e63f86840000, R15
+	ADCQ R15, DX
+	MOVQ $0xd0b685e868524ec0, R15
+	ADCQ R15, CX
+	MOVQ $0x4302aa3c258de7de, R15
+	ADCQ R15, BX
+	MOVQ $0xe292cd15edb646a5, R15
+	ADCQ R15, SI
+	MOVQ $0x0a7eb1cb3d06e646, R15
+	ADCQ R15, DI
+	MOVQ $0xeb02c812ea04faaa, R15
+	ADCQ R15, R8
+	MOVQ $0xccc6ae73c42a46d9, R15
+	ADCQ R15, R9
+	MOVQ $0xfbf23221455163a6, R15
+	ADCQ R15, R10
+	MOVQ $0x5c978cd2fac2ce89, R15
+	ADCQ R15, R11
+	MOVQ $0xe2ac127e1e3568cf, R15
+	ADCQ R15, R12
+	MOVQ $0x000f76adbb5bb98a, R15
+	ADCQ R15, R13
+
+l1:
+	MOVQ res+0(FP), R14
+	MOVQ AX, 0(R14)
+	MOVQ DX, 8(R14)
+	MOVQ CX, 16(R14)
+	MOVQ BX, 24(R14)
+	MOVQ SI, 32(R14)
+	MOVQ DI, 40(R14)
+	MOVQ R8, 48(R14)
+	MOVQ R9, 56(R14)
+	MOVQ R10, 64(R14)
+	MOVQ R11, 72(R14)
+	MOVQ R12, 80(R14)
+	MOVQ R13, 88(R14)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), $80-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,AX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,AX,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP))
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	MOVQ R9, 48(R15)
+	MOVQ R10, 56(R15)
+	MOVQ R11, 64(R15)
+	MOVQ R12, 72(R15)
+	MOVQ R13, 80(R15)
+	MOVQ R14, 88(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R15
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  48(AX), R9
+	MOVQ  56(AX), R10
+	MOVQ  64(AX), R11
+	MOVQ  72(AX), R12
+	MOVQ  80(AX), R13
+	MOVQ  88(AX), R14
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	ORQ   R11, AX
+	ORQ   R12, AX
+	ORQ   R13, AX
+	ORQ   R14, AX
+	TESTQ AX, AX
+	JEQ   l2
+	MOVQ  $1, AX
+	SUBQ  DX, AX
+	MOVQ  AX, 0(R15)
+	MOVQ  $0x33c7e63f86840000, AX
+	SBBQ  CX, AX
+	MOVQ  AX, 8(R15)
+	MOVQ  $0xd0b685e868524ec0, AX
+	SBBQ  BX, AX
+	MOVQ  AX, 16(R15)
+	MOVQ  $0x4302aa3c258de7de, AX
+	SBBQ  SI, AX
+	MOVQ  AX, 24(R15)
+	MOVQ  $0xe292cd15edb646a5, AX
+	SBBQ  DI, AX
+	MOVQ  AX, 32(R15)
+	MOVQ  $0x0a7eb1cb3d06e646, AX
+	SBBQ  R8, AX
+	MOVQ  AX, 40(R15)
+	MOVQ  $0xeb02c812ea04faaa, AX
+	SBBQ  R9, AX
+	MOVQ  AX, 48(R15)
+	MOVQ  $0xccc6ae73c42a46d9, AX
+	SBBQ  R10, AX
+	MOVQ  AX, 56(R15)
+	MOVQ  $0xfbf23221455163a6, AX
+	SBBQ  R11, AX
+	MOVQ  AX, 64(R15)
+	MOVQ  $0x5c978cd2fac2ce89, AX
+	SBBQ  R12, AX
+	MOVQ  AX, 72(R15)
+	MOVQ  $0xe2ac127e1e3568cf, AX
+	SBBQ  R13, AX
+	MOVQ  AX, 80(R15)
+	MOVQ  $0x000f76adbb5bb98a, AX
+	SBBQ  R14, AX
+	MOVQ  AX, 88(R15)
+	RET
+
+l2:
+	MOVQ AX, 0(R15)
+	MOVQ AX, 8(R15)
+	MOVQ AX, 16(R15)
+	MOVQ AX, 24(R15)
+	MOVQ AX, 32(R15)
+	MOVQ AX, 40(R15)
+	MOVQ AX, 48(R15)
+	MOVQ AX, 56(R15)
+	MOVQ AX, 64(R15)
+	MOVQ AX, 72(R15)
+	MOVQ AX, 80(R15)
+	MOVQ AX, 88(R15)
+	RET
+
+TEXT ·reduce(SB), $88-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), $88-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), $88-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $184-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,s11-96(SP),s12-104(SP),s13-112(SP),s14-120(SP),s15-128(SP),s16-136(SP),s17-144(SP),s18-152(SP),s19-160(SP),s20-168(SP),s21-176(SP),s22-184(SP))
+
+	MOVQ DX, s11-96(SP)
+	MOVQ CX, s12-104(SP)
+	MOVQ BX, s13-112(SP)
+	MOVQ SI, s14-120(SP)
+	MOVQ DI, s15-128(SP)
+	MOVQ R8, s16-136(SP)
+	MOVQ R9, s17-144(SP)
+	MOVQ R10, s18-152(SP)
+	MOVQ R11, s19-160(SP)
+	MOVQ R12, s20-168(SP)
+	MOVQ R13, s21-176(SP)
+	MOVQ R14, s22-184(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+	ADCQ R10, R10
+	ADCQ R11, R11
+	ADCQ R12, R12
+	ADCQ R13, R13
+	ADCQ R14, R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ s11-96(SP), DX
+	ADCQ s12-104(SP), CX
+	ADCQ s13-112(SP), BX
+	ADCQ s14-120(SP), SI
+	ADCQ s15-128(SP), DI
+	ADCQ s16-136(SP), R8
+	ADCQ s17-144(SP), R9
+	ADCQ s18-152(SP), R10
+	ADCQ s19-160(SP), R11
+	ADCQ s20-168(SP), R12
+	ADCQ s21-176(SP), R13
+	ADCQ s22-184(SP), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $88-16
+	MOVQ b+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	MOVQ a+0(FP), AX
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+	ADCQ 48(AX), R9
+	ADCQ 56(AX), R10
+	ADCQ 64(AX), R11
+	ADCQ 72(AX), R12
+	ADCQ 80(AX), R13
+	ADCQ 88(AX), R14
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	MOVQ R9, s5-48(SP)
+	MOVQ R10, s6-56(SP)
+	MOVQ R11, s7-64(SP)
+	MOVQ R12, s8-72(SP)
+	MOVQ R13, s9-80(SP)
+	MOVQ R14, s10-88(SP)
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	MOVQ 48(AX), R9
+	MOVQ 56(AX), R10
+	MOVQ 64(AX), R11
+	MOVQ 72(AX), R12
+	MOVQ 80(AX), R13
+	MOVQ 88(AX), R14
+	MOVQ b+8(FP), AX
+	SUBQ 0(AX), DX
+	SBBQ 8(AX), CX
+	SBBQ 16(AX), BX
+	SBBQ 24(AX), SI
+	SBBQ 32(AX), DI
+	SBBQ 40(AX), R8
+	SBBQ 48(AX), R9
+	SBBQ 56(AX), R10
+	SBBQ 64(AX), R11
+	SBBQ 72(AX), R12
+	SBBQ 80(AX), R13
+	SBBQ 88(AX), R14
+	JCC  l3
+	MOVQ $1, AX
+	ADDQ AX, DX
+	MOVQ $0x33c7e63f86840000, AX
+	ADCQ AX, CX
+	MOVQ $0xd0b685e868524ec0, AX
+	ADCQ AX, BX
+	MOVQ $0x4302aa3c258de7de, AX
+	ADCQ AX, SI
+	MOVQ $0xe292cd15edb646a5, AX
+	ADCQ AX, DI
+	MOVQ $0x0a7eb1cb3d06e646, AX
+	ADCQ AX, R8
+	MOVQ $0xeb02c812ea04faaa, AX
+	ADCQ AX, R9
+	MOVQ $0xccc6ae73c42a46d9, AX
+	ADCQ AX, R10
+	MOVQ $0xfbf23221455163a6, AX
+	ADCQ AX, R11
+	MOVQ $0x5c978cd2fac2ce89, AX
+	ADCQ AX, R12
+	MOVQ $0xe2ac127e1e3568cf, AX
+	ADCQ AX, R13
+	MOVQ $0x000f76adbb5bb98a, AX
+	ADCQ AX, R14
+
+l3:
+	MOVQ b+8(FP), AX
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	MOVQ R15, DX
+	MOVQ s0-8(SP), CX
+	MOVQ s1-16(SP), BX
+	MOVQ s2-24(SP), SI
+	MOVQ s3-32(SP), DI
+	MOVQ s4-40(SP), R8
+	MOVQ s5-48(SP), R9
+	MOVQ s6-56(SP), R10
+	MOVQ s7-64(SP), R11
+	MOVQ s8-72(SP), R12
+	MOVQ s9-80(SP), R13
+	MOVQ s10-88(SP), R14
+
+	// reduce element(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP),s5-48(SP),s6-56(SP),s7-64(SP),s8-72(SP),s9-80(SP),s10-88(SP))
+
+	MOVQ a+0(FP), AX
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	MOVQ R9, 48(AX)
+	MOVQ R10, 56(AX)
+	MOVQ R11, 64(AX)
+	MOVQ R12, 72(AX)
+	MOVQ R13, 80(AX)
+	MOVQ R14, 88(AX)
+	RET
diff --git a/ecc/bw6-756/fp/element_ops_noasm.go b/ecc/bw6-756/fp/element_ops_noasm.go
new file mode 100644
index 000000000..fec628918
--- /dev/null
+++ b/ecc/bw6-756/fp/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bw6-756/fp/element_test.go b/ecc/bw6-756/fp/element_test.go
new file mode 100644
index 000000000..bfcc1d7fc
--- /dev/null
+++ b/ecc/bw6-756/fp/element_test.go
@@ -0,0 +1,2777 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		11214533042317621956,
+		4418601975293183768,
+		2233550636059863627,
+		13772400071271951950,
+		13010224617750716256,
+		15582310590478290871,
+		6301429202206019695,
+		15624904615961126890,
+		14411832617204527559,
+		10495912060283172777,
+		8432856701560321958,
+		4166778949326216,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		11214533042317621956,
+		4418601975293183768,
+		2233550636059863627,
+		13772400071271951950,
+		13010224617750716256,
+		15582310590478290871,
+		6301429202206019695,
+		15624904615961126890,
+		14411832617204527559,
+		10495912060283172777,
+		8432856701560321958,
+		4166778949326216,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 20
+	nbFuzz      = 100
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[11]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[11]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000", 16)
+	const sqrtExponentElement = "1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[11] > qElement[11] {
+		return true
+	}
+	if z[11] < qElement[11] {
+		return false
+	}
+
+	if z[10] > qElement[10] {
+		return true
+	}
+	if z[10] < qElement[10] {
+		return false
+	}
+
+	if z[9] > qElement[9] {
+		return true
+	}
+	if z[9] < qElement[9] {
+		return false
+	}
+
+	if z[8] > qElement[8] {
+		return true
+	}
+	if z[8] < qElement[8] {
+		return false
+	}
+
+	if z[7] > qElement[7] {
+		return true
+	}
+	if z[7] < qElement[7] {
+		return false
+	}
+
+	if z[6] > qElement[6] {
+		return true
+	}
+	if z[6] < qElement[6] {
+		return false
+	}
+
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[11] != ^uint64(0) {
+			g.element[11] %= (qElement[11] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[11] != ^uint64(0) {
+				g.element[11] %= (qElement[11] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[11] != ^uint64(0) {
+				g[11] %= (qElement[11] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[11] != ^uint64(0) {
+					g[11] %= (qElement[11] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], carry = bits.Add64(a[5], qElement[5], carry)
+		a[6], carry = bits.Add64(a[6], qElement[6], carry)
+		a[7], carry = bits.Add64(a[7], qElement[7], carry)
+		a[8], carry = bits.Add64(a[8], qElement[8], carry)
+		a[9], carry = bits.Add64(a[9], qElement[9], carry)
+		a[10], carry = bits.Add64(a[10], qElement[10], carry)
+		a[11], _ = bits.Add64(a[11], qElement[11], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+		inversionCorrectionFactorWord6,
+		inversionCorrectionFactorWord7,
+		inversionCorrectionFactorWord8,
+		inversionCorrectionFactorWord9,
+		inversionCorrectionFactorWord10,
+		inversionCorrectionFactorWord11,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+			inversionCorrectionFactorWord6,
+			inversionCorrectionFactorWord7,
+			inversionCorrectionFactorWord8,
+			inversionCorrectionFactorWord9,
+			inversionCorrectionFactorWord10,
+			inversionCorrectionFactorWord11,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bw6-756/fr/arith.go b/ecc/bw6-756/fr/arith.go
new file mode 100644
index 000000000..83c9fd9ef
--- /dev/null
+++ b/ecc/bw6-756/fr/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bw6-756/fr/asm.go b/ecc/bw6-756/fr/asm.go
new file mode 100644
index 000000000..8241357c4
--- /dev/null
+++ b/ecc/bw6-756/fr/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bw6-756/fr/asm_noadx.go b/ecc/bw6-756/fr/asm_noadx.go
new file mode 100644
index 000000000..221beab93
--- /dev/null
+++ b/ecc/bw6-756/fr/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bw6-756/fr/doc.go b/ecc/bw6-756/fr/doc.go
new file mode 100644
index 000000000..215d19b3d
--- /dev/null
+++ b/ecc/bw6-756/fr/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fr contains field arithmetic operations for modulus = 0x3eeb04...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [6]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f76a822c00009948a20000000001 // base 16
+// 	605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417 // base 10
+package fr
diff --git a/ecc/bw6-756/fr/element.go b/ecc/bw6-756/fr/element.go
new file mode 100644
index 000000000..afbec34c8
--- /dev/null
+++ b/ecc/bw6-756/fr/element.go
@@ -0,0 +1,1720 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 6 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+type Element [6]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 6
+
+// Bits number bits needed to represent Element
+const Bits = 378
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 11045256207009841153
+const qElementWord1 uint64 = 14886639130118979584
+const qElementWord2 uint64 = 10956628289047010687
+const qElementWord3 uint64 = 9513184293603517222
+const qElementWord4 uint64 = 6038022134869067682
+const qElementWord5 uint64 = 283357621510263184
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 11045256207009841151
+
+// rSquare
+var rSquare = Element{
+	13541478318970833666,
+	5510290684934426267,
+	8467587974331926354,
+	13931463632695577534,
+	3531303697457869800,
+	51529254522778566,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 1481365419032838079
+	z[1] = 10045892448872562649
+	z[2] = 7242180086616818316
+	z[3] = 8832319421896135475
+	z[4] = 13356930855120736188
+	z[5] = 28498675542444634
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 6 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 5522628103504920577, 0)
+	_, b = bits.Sub64(_z[1], 16666691601914265600, b)
+	_, b = bits.Sub64(_z[2], 5478314144523505343, b)
+	_, b = bits.Sub64(_z[3], 4756592146801758611, b)
+	_, b = bits.Sub64(_z[4], 3019011067434533841, b)
+	_, b = bits.Sub64(_z[5], 141678810755131592, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [48]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[5] %= 283357621510263184
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], carry = bits.Add64(z[1], 14886639130118979584, carry)
+		z[2], carry = bits.Add64(z[2], 10956628289047010687, carry)
+		z[3], carry = bits.Add64(z[3], 9513184293603517222, carry)
+		z[4], carry = bits.Add64(z[4], 6038022134869067682, carry)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [6]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		z[5], z[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [6]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 14886639130118979584, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 10956628289047010687, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 9513184293603517222, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 6038022134869067682, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		t[5], t[4] = madd3(m, 283357621510263184, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, z[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, z[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, z[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, z[3] = madd2(m, 6038022134869067682, c2, t[4])
+		z[5], z[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], _ = bits.Add64(x[5], y[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], _ = bits.Add64(x[5], x[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], c = bits.Add64(z[1], 14886639130118979584, c)
+		z[2], c = bits.Add64(z[2], 10956628289047010687, c)
+		z[3], c = bits.Add64(z[3], 9513184293603517222, c)
+		z[4], c = bits.Add64(z[4], 6038022134869067682, c)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(11045256207009841153, x[0], 0)
+	z[1], borrow = bits.Sub64(14886639130118979584, x[1], borrow)
+	z[2], borrow = bits.Sub64(10956628289047010687, x[2], borrow)
+	z[3], borrow = bits.Sub64(9513184293603517222, x[3], borrow)
+	z[4], borrow = bits.Sub64(6038022134869067682, x[4], borrow)
+	z[5], _ = bits.Sub64(283357621510263184, x[5], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[40:48], z[0])
+	binary.BigEndian.PutUint64(b[32:40], z[1])
+	binary.BigEndian.PutUint64(b[24:32], z[2])
+	binary.BigEndian.PutUint64(b[16:24], z[3])
+	binary.BigEndian.PutUint64(b[8:16], z[4])
+	binary.BigEndian.PutUint64(b[0:8], z[5])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[40:48], _z[0])
+	binary.BigEndian.PutUint64(res[32:40], _z[1])
+	binary.BigEndian.PutUint64(res[24:32], _z[2])
+	binary.BigEndian.PutUint64(res[16:24], _z[3])
+	binary.BigEndian.PutUint64(res[8:16], _z[4])
+	binary.BigEndian.PutUint64(res[0:8], _z[5])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[5] == 28498675542444634) && (l[4] == 13356930855120736188) && (l[3] == 8832319421896135475) && (l[2] == 7242180086616818316) && (l[1] == 10045892448872562649) && (l[0] == 1481365419032838079) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		15655215628902554004,
+		15894127656167592378,
+		9702012166408397168,
+		12335982559306940759,
+		1313802173610541430,
+		81629743607937133,
+	}
+	r := uint64(41)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 851295657643717122
+const inversionCorrectionFactorWord1 = 10857859049187504913
+const inversionCorrectionFactorWord2 = 7148604188520083019
+const inversionCorrectionFactorWord3 = 1138623559447261654
+const inversionCorrectionFactorWord4 = 1203095380280779597
+const inversionCorrectionFactorWord5 = 148579538565968037
+
+const invIterationsN = 26
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+
+	// the high word of m * qElement[5] is at most 62 bits
+	// x[5] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[6] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		z[5], z[4] = madd2(m, qElementWord5, t[i+5], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[5] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], _ = bits.Add64(neg1, qElementWord5, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 6 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 5
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bw6-756/fr/element_exp.go b/ecc/bw6-756/fr/element_exp.go
new file mode 100644
index 000000000..7ac8671aa
--- /dev/null
+++ b/ecc/bw6-756/fr/element_exp.go
@@ -0,0 +1,1040 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// expBySqrtExp is equivalent to z.Exp(x, fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	return     ((_101001 + i386) << 6 + _101) << 3
+	//
+	// Operations: 330 squares 67 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 397: z = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	i399     = ((_101001 + i386) << 6 + _101) << 4 + 1
+	//	return     i399 << 40
+	//
+	// Operations: 371 squares 68 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 398: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca450
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	// Step 399: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca451
+	z.Mul(&x, z)
+
+	// Step 439: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
+	for s := 0; s < 40; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bw6-756/fr/element_fuzz.go b/ecc/bw6-756/fr/element_fuzz.go
new file mode 100644
index 000000000..9d00610dd
--- /dev/null
+++ b/ecc/bw6-756/fr/element_fuzz.go
@@ -0,0 +1,152 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[5] %= qElement[5]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bw6-756/fr/element_mul_adx_amd64.s b/ecc/bw6-756/fr/element_mul_adx_amd64.s
new file mode 100644
index 000000000..a6f902c36
--- /dev/null
+++ b/ecc/bw6-756/fr/element_mul_adx_amd64.s
@@ -0,0 +1,836 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
diff --git a/ecc/bw6-756/fr/element_mul_amd64.s b/ecc/bw6-756/fr/element_mul_amd64.s
new file mode 100644
index 000000000..171a75360
--- /dev/null
+++ b/ecc/bw6-756/fr/element_mul_amd64.s
@@ -0,0 +1,858 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bw6-756/fr/element_ops_amd64.go b/ecc/bw6-756/fr/element_ops_amd64.go
new file mode 100644
index 000000000..78022b3e6
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bw6-756/fr/element_ops_amd64.s b/ecc/bw6-756/fr/element_ops_amd64.s
new file mode 100644
index 000000000..97da07d77
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_amd64.s
@@ -0,0 +1,452 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 0(R8)
+	MOVQ    DX, 8(R8)
+	MOVQ    CX, 16(R8)
+	MOVQ    BX, 24(R8)
+	MOVQ    SI, 32(R8)
+	MOVQ    DI, 40(R8)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R9
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x9948a20000000001, R10
+	SUBQ  DX, R10
+	MOVQ  R10, 0(R9)
+	MOVQ  $0xce97f76a822c0000, R10
+	SBBQ  CX, R10
+	MOVQ  R10, 8(R9)
+	MOVQ  $0x980dc360d0a49d7f, R10
+	SBBQ  BX, R10
+	MOVQ  R10, 16(R9)
+	MOVQ  $0x84059eb647102326, R10
+	SBBQ  SI, R10
+	MOVQ  R10, 24(R9)
+	MOVQ  $0x53cb5d240ed107a2, R10
+	SBBQ  DI, R10
+	MOVQ  R10, 32(R9)
+	MOVQ  $0x03eeb0416684d190, R10
+	SBBQ  R8, R10
+	MOVQ  R10, 40(R9)
+	RET
+
+l1:
+	MOVQ AX, 0(R9)
+	MOVQ AX, 8(R9)
+	MOVQ AX, 16(R9)
+	MOVQ AX, 24(R9)
+	MOVQ AX, 32(R9)
+	MOVQ AX, 40(R9)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
+	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $40-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ R15, DX
+	ADCQ s0-8(SP), CX
+	ADCQ s1-16(SP), BX
+	ADCQ s2-24(SP), SI
+	ADCQ s3-32(SP), DI
+	ADCQ s4-40(SP), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $48-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    32(AX), R8
+	MOVQ    40(AX), R9
+	MOVQ    CX, R10
+	MOVQ    BX, R11
+	MOVQ    SI, R12
+	MOVQ    DI, R13
+	MOVQ    R8, R14
+	MOVQ    R9, R15
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	ADCQ    32(DX), R8
+	ADCQ    40(DX), R9
+	SUBQ    0(DX), R10
+	SBBQ    8(DX), R11
+	SBBQ    16(DX), R12
+	SBBQ    24(DX), R13
+	SBBQ    32(DX), R14
+	SBBQ    40(DX), R15
+	MOVQ    CX, s0-8(SP)
+	MOVQ    BX, s1-16(SP)
+	MOVQ    SI, s2-24(SP)
+	MOVQ    DI, s3-32(SP)
+	MOVQ    R8, s4-40(SP)
+	MOVQ    R9, s5-48(SP)
+	MOVQ    $0x9948a20000000001, CX
+	MOVQ    $0xce97f76a822c0000, BX
+	MOVQ    $0x980dc360d0a49d7f, SI
+	MOVQ    $0x84059eb647102326, DI
+	MOVQ    $0x53cb5d240ed107a2, R8
+	MOVQ    $0x03eeb0416684d190, R9
+	CMOVQCC AX, CX
+	CMOVQCC AX, BX
+	CMOVQCC AX, SI
+	CMOVQCC AX, DI
+	CMOVQCC AX, R8
+	CMOVQCC AX, R9
+	ADDQ    CX, R10
+	ADCQ    BX, R11
+	ADCQ    SI, R12
+	ADCQ    DI, R13
+	ADCQ    R8, R14
+	ADCQ    R9, R15
+	MOVQ    s0-8(SP), CX
+	MOVQ    s1-16(SP), BX
+	MOVQ    s2-24(SP), SI
+	MOVQ    s3-32(SP), DI
+	MOVQ    s4-40(SP), R8
+	MOVQ    s5-48(SP), R9
+	MOVQ    R10, 0(DX)
+	MOVQ    R11, 8(DX)
+	MOVQ    R12, 16(DX)
+	MOVQ    R13, 24(DX)
+	MOVQ    R14, 32(DX)
+	MOVQ    R15, 40(DX)
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
diff --git a/ecc/bw6-756/fr/element_ops_noasm.go b/ecc/bw6-756/fr/element_ops_noasm.go
new file mode 100644
index 000000000..ec1fac18d
--- /dev/null
+++ b/ecc/bw6-756/fr/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bw6-756/fr/element_test.go b/ecc/bw6-756/fr/element_test.go
new file mode 100644
index 000000000..4b3fb2975
--- /dev/null
+++ b/ecc/bw6-756/fr/element_test.go
@@ -0,0 +1,2681 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[5]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[5]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000", 16)
+	const sqrtExponentElement = "fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[5] != ^uint64(0) {
+			g.element[5] %= (qElement[5] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[5] != ^uint64(0) {
+				g.element[5] %= (qElement[5] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[5] != ^uint64(0) {
+				g[5] %= (qElement[5] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[5] != ^uint64(0) {
+					g[5] %= (qElement[5] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], _ = bits.Add64(a[5], qElement[5], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bw6-756/fr/fft/doc.go b/ecc/bw6-756/fr/fft/doc.go
new file mode 100644
index 000000000..3c35170e8
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fft provides in-place discrete Fourier transform.
+package fft
diff --git a/ecc/bw6-756/fr/fft/domain.go b/ecc/bw6-756/fr/fft/domain.go
new file mode 100644
index 000000000..dc31f8246
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/domain.go
@@ -0,0 +1,300 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"fmt"
+	"io"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+
+	curve "github.com/consensys/gnark-crypto/ecc/bw6-756"
+
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+// Domain with a power of 2 cardinality
+// compute a field element of order 2x and store it in FinerGenerator
+// all other values can be derived from x, GeneratorSqrt
+type Domain struct {
+	Cardinality             uint64
+	Depth                   uint64
+	PrecomputeReversedTable uint64 // uint64 so it is recognized by the decoder from gnark-crypto
+	CardinalityInv          fr.Element
+	Generator               fr.Element
+	GeneratorInv            fr.Element
+	FinerGenerator          fr.Element
+	FinerGeneratorInv       fr.Element
+
+	// the following slices are not serialized and are (re)computed through domain.preComputeTwiddles()
+
+	// Twiddles factor for the FFT using Generator for each stage of the recursive FFT
+	Twiddles [][]fr.Element
+
+	// Twiddles factor for the FFT using GeneratorInv for each stage of the recursive FFT
+	TwiddlesInv [][]fr.Element
+
+	// we precompute these mostly to avoid the memory intensive bit reverse permutation in the groth16.Prover
+
+	// CosetTable[i][j] = domain.Generator(i-th)Sqrt ^ j
+	// CosetTable = fft.BitReverse(CosetTable)
+	CosetTable         [][]fr.Element
+	CosetTableReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+
+	// CosetTable[i][j] = domain.Generator(i-th)SqrtInv ^ j
+	// CosetTableInv = fft.BitReverse(CosetTableInv)
+	CosetTableInv         [][]fr.Element
+	CosetTableInvReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+}
+
+// NewDomain returns a subgroup with a power of 2 cardinality
+// cardinality >= m
+// If depth>0, the Domain will also store a primitive (2**depth)*m root
+// of 1, with associated precomputed data. This allows to perform shifted
+// FFT/FFTInv.
+// If precomputeReversedCosetTable is set, the bit reversed cosetTable/cosetTableInv are precomputed.
+//
+// example:
+// --------
+//
+// * NewDomain(m, 0, false) outputs a new domain to perform the fft on Z/mZ.
+// * NewDomain(m, 2, false) outputs a new domain to perform fft on Z/mZ, plus a primitive
+// 2**2*m=4m-th root of 1 and associated data to compute fft/fftinv on the cosets of
+// (Z/4mZ)/(Z/mZ).
+func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
+
+	// generator of the largest 2-adic subgroup
+	var rootOfUnity fr.Element
+
+	rootOfUnity.SetString("199251335866470442271346949249090720992237796757894062992204115206570647302191425225605716521843542790404563904580")
+	const maxOrderRoot uint64 = 41
+
+	domain := &Domain{}
+	x := ecc.NextPowerOfTwo(m)
+	domain.Cardinality = uint64(x)
+	domain.Depth = depth
+	if precomputeReversedTable {
+		domain.PrecomputeReversedTable = 1
+	}
+
+	// find generator for Z/2^(log(m))Z  and Z/2^(log(m)+cosets)Z
+	logx := uint64(bits.TrailingZeros64(x))
+	if logx > maxOrderRoot {
+		panic(fmt.Sprintf("m (%d) is too big: the required root of unity does not exist", m))
+	}
+	logGen := logx + depth
+	if logGen > maxOrderRoot {
+		panic("log(m) + cosets is too big: the required root of unity does not exist")
+	}
+
+	expo := uint64(1 << (maxOrderRoot - logGen))
+	bExpo := new(big.Int).SetUint64(expo)
+	domain.FinerGenerator.Exp(rootOfUnity, bExpo)
+	domain.FinerGeneratorInv.Inverse(&domain.FinerGenerator)
+
+	// Generator = FinerGenerator^2 has order x
+	expo = uint64(1 << (maxOrderRoot - logx))
+	bExpo.SetUint64(expo)
+	domain.Generator.Exp(rootOfUnity, bExpo) // order x
+	domain.GeneratorInv.Inverse(&domain.Generator)
+	domain.CardinalityInv.SetUint64(uint64(x)).Inverse(&domain.CardinalityInv)
+
+	// twiddle factors
+	domain.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if depth > 0 && precomputeReversedTable {
+		domain.reverseCosetTables()
+	}
+
+	return domain
+}
+
+func (d *Domain) reverseCosetTables() {
+	nbCosets := (1 << d.Depth) - 1
+	d.CosetTableReversed = make([][]fr.Element, nbCosets)
+	d.CosetTableInvReversed = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTableReversed[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInvReversed[i] = make([]fr.Element, d.Cardinality)
+		copy(d.CosetTableReversed[i], d.CosetTable[i])
+		copy(d.CosetTableInvReversed[i], d.CosetTableInv[i])
+		BitReverse(d.CosetTableReversed[i])
+		BitReverse(d.CosetTableInvReversed[i])
+	}
+}
+
+func (d *Domain) preComputeTwiddles() {
+
+	// nb fft stages
+	nbStages := uint64(bits.TrailingZeros64(d.Cardinality))
+	nbCosets := (1 << d.Depth) - 1
+
+	d.Twiddles = make([][]fr.Element, nbStages)
+	d.TwiddlesInv = make([][]fr.Element, nbStages)
+	d.CosetTable = make([][]fr.Element, nbCosets)
+	d.CosetTableInv = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTable[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInv[i] = make([]fr.Element, d.Cardinality)
+	}
+
+	var wg sync.WaitGroup
+
+	// for each fft stage, we pre compute the twiddle factors
+	twiddles := func(t [][]fr.Element, omega fr.Element) {
+		for i := uint64(0); i < nbStages; i++ {
+			t[i] = make([]fr.Element, 1+(1<<(nbStages-i-1)))
+			var w fr.Element
+			if i == 0 {
+				w = omega
+			} else {
+				w = t[i-1][2]
+			}
+			t[i][0] = fr.One()
+			t[i][1] = w
+			for j := 2; j < len(t[i]); j++ {
+				t[i][j].Mul(&t[i][j-1], &w)
+			}
+		}
+		wg.Done()
+	}
+
+	expTable := func(sqrt fr.Element, t []fr.Element) {
+		t[0] = fr.One()
+		precomputeExpTable(sqrt, t)
+		wg.Done()
+	}
+
+	if nbCosets > 0 {
+		cosetGens := make([]fr.Element, nbCosets)
+		cosetGensInv := make([]fr.Element, nbCosets)
+		cosetGens[0].Set(&d.FinerGenerator)
+		cosetGensInv[0].Set(&d.FinerGeneratorInv)
+		for i := 1; i < nbCosets; i++ {
+			cosetGens[i].Mul(&cosetGens[i-1], &d.FinerGenerator)
+			cosetGensInv[i].Mul(&cosetGensInv[i-1], &d.FinerGeneratorInv)
+		}
+		wg.Add(2 + 2*nbCosets)
+		go twiddles(d.Twiddles, d.Generator)
+		go twiddles(d.TwiddlesInv, d.GeneratorInv)
+		for i := 0; i < nbCosets-1; i++ {
+			go expTable(cosetGens[i], d.CosetTable[i])
+			go expTable(cosetGensInv[i], d.CosetTableInv[i])
+		}
+		go expTable(cosetGens[nbCosets-1], d.CosetTable[nbCosets-1])
+		expTable(cosetGensInv[nbCosets-1], d.CosetTableInv[nbCosets-1])
+
+		wg.Wait()
+
+	} else {
+		wg.Add(2)
+		go twiddles(d.Twiddles, d.Generator)
+		twiddles(d.TwiddlesInv, d.GeneratorInv)
+		wg.Wait()
+	}
+
+}
+
+func precomputeExpTable(w fr.Element, table []fr.Element) {
+	n := len(table)
+
+	// see if it makes sense to parallelize exp tables pre-computation
+	interval := 0
+	if runtime.NumCPU() >= 4 {
+		interval = (n - 1) / (runtime.NumCPU() / 4)
+	}
+
+	// this ratio roughly correspond to the number of multiplication one can do in place of a Exp operation
+	const ratioExpMul = 6000 / 17
+
+	if interval < ratioExpMul {
+		precomputeExpTableChunk(w, 1, table[1:])
+		return
+	}
+
+	// we parallelize
+	var wg sync.WaitGroup
+	for i := 1; i < n; i += interval {
+		start := i
+		end := i + interval
+		if end > n {
+			end = n
+		}
+		wg.Add(1)
+		go func() {
+			precomputeExpTableChunk(w, uint64(start), table[start:end])
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func precomputeExpTableChunk(w fr.Element, power uint64, table []fr.Element) {
+
+	// this condition ensures that creating a domain of size 1 with cosets don't fail
+	if len(table) > 0 {
+		table[0].Exp(w, new(big.Int).SetUint64(power))
+		for i := 1; i < len(table); i++ {
+			table[i].Mul(&table[i-1], &w)
+		}
+	}
+}
+
+// WriteTo writes a binary representation of the domain (without the precomputed twiddle factors)
+// to the provided writer
+func (d *Domain) WriteTo(w io.Writer) (int64, error) {
+
+	enc := curve.NewEncoder(w)
+
+	toEncode := []interface{}{d.Cardinality, d.Depth, d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom attempts to decode a domain from Reader
+func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
+
+	dec := curve.NewDecoder(r)
+
+	toDecode := []interface{}{&d.Cardinality, &d.Depth, &d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	d.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if d.Depth > 0 && d.PrecomputeReversedTable == 1 {
+		d.reverseCosetTables()
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bw6-756/fr/fft/domain_test.go b/ecc/bw6-756/fr/fft/domain_test.go
new file mode 100644
index 000000000..df72f0e3a
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/domain_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestDomainSerialization(t *testing.T) {
+
+	domain := NewDomain(1<<6, 1, true)
+	var reconstructed Domain
+
+	var buf bytes.Buffer
+	written, err := domain.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var read int64
+	read, err = reconstructed.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if written != read {
+		t.Fatal("didn't read as many bytes as we wrote")
+	}
+	if !reflect.DeepEqual(domain, &reconstructed) {
+		t.Fatal("Domain.SetBytes(Bytes()) failed")
+	}
+}
diff --git a/ecc/bw6-756/fr/fft/fft.go b/ecc/bw6-756/fr/fft/fft.go
new file mode 100644
index 000000000..503f375ba
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fft.go
@@ -0,0 +1,319 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/bits"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// Decimation is used in the FFT call to select decimation in time or in frequency
+type Decimation uint8
+
+const (
+	DIT Decimation = iota
+	DIF
+)
+
+// parallelize threshold for a single butterfly op, if the fft stage is not parallelized already
+const butterflyThreshold = 16
+
+// FFT computes (recursively) the discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+//
+// example:
+// -------
+// domain := NewDomain(m, 2) -->  contains precomputed data for Z/mZ, and Z/4mZ
+// FFT(pol, DIT, 1) --> evaluates pol on the coset 1 in (Z/4mZ)/(Z/mZ)
+func (domain *Domain) FFT(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// if coset != 0, scale by coset table
+	if coset != 0 {
+		scale := func(cosetTable []fr.Element) {
+			parallel.Execute(len(a), func(start, end int) {
+				for i := start; i < end; i++ {
+					a[i].Mul(&a[i], &cosetTable[i])
+				}
+			})
+		}
+		if decimation == DIT {
+			if domain.PrecomputeReversedTable == 0 {
+				// no precomputed coset, we adjust the index of the coset table
+				n := uint64(len(a))
+				nn := uint64(64 - bits.TrailingZeros64(n))
+				parallel.Execute(len(a), func(start, end int) {
+					for i := start; i < end; i++ {
+						irev := bits.Reverse64(uint64(i)) >> nn
+						a[i].Mul(&a[i], &domain.CosetTable[coset-1][int(irev)])
+					}
+				})
+			} else {
+				scale(domain.CosetTableReversed[coset-1])
+			}
+		} else {
+			scale(domain.CosetTable[coset-1])
+		}
+	}
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+}
+
+// FFTInverse computes (recursively) the inverse discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+
+	// scale by CardinalityInv (+ cosetTableInv is coset!=0)
+	if coset == 0 {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+		return
+	}
+
+	scale := func(cosetTable []fr.Element) {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &cosetTable[i]).
+					Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+	}
+	if decimation == DIT {
+		scale(domain.CosetTableInv[coset-1])
+		return
+	}
+
+	// decimation == DIF
+	if domain.PrecomputeReversedTable != 0 {
+		scale(domain.CosetTableInvReversed[coset-1])
+		return
+	}
+
+	// no precomputed coset, we adjust the index of the coset table
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+	parallel.Execute(len(a), func(start, end int) {
+		for i := start; i < end; i++ {
+			irev := bits.Reverse64(uint64(i)) >> nn
+			a[i].Mul(&a[i], &domain.CosetTableInv[coset-1][int(irev)]).
+				Mul(&a[i], &domain.CardinalityInv)
+		}
+	})
+
+}
+
+func difFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIF8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for i := start; i < end; i++ {
+				fr.Butterfly(&a[i], &a[i+m])
+				a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+			}
+		}, numCPU)
+	} else {
+		// i == 0
+		fr.Butterfly(&a[0], &a[m])
+		for i := 1; i < m; i++ {
+			fr.Butterfly(&a[i], &a[i+m])
+			a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+		}
+	}
+
+	if m == 1 {
+		return
+	}
+
+	nextStage := stage + 1
+	if stage < maxSplits {
+		chDone := make(chan struct{}, 1)
+		go difFFT(a[m:n], twiddles, nextStage, maxSplits, chDone)
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		difFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+	}
+
+}
+
+func ditFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIT8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	nextStage := stage + 1
+
+	if stage < maxSplits {
+		// that's the only time we fire go routines
+		chDone := make(chan struct{}, 1)
+		go ditFFT(a[m:], twiddles, nextStage, maxSplits, chDone)
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		ditFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+
+	}
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for k := start; k < end; k++ {
+				a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+				fr.Butterfly(&a[k], &a[k+m])
+			}
+		}, numCPU)
+
+	} else {
+		fr.Butterfly(&a[0], &a[m])
+		for k := 1; k < m; k++ {
+			a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+			fr.Butterfly(&a[k], &a[k+m])
+		}
+	}
+}
+
+// BitReverse applies the bit-reversal permutation to a.
+// len(a) must be a power of 2 (as in every single function in this file)
+func BitReverse(a []fr.Element) {
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+
+	for i := uint64(0); i < n; i++ {
+		irev := bits.Reverse64(i) >> nn
+		if irev > i {
+			a[i], a[irev] = a[irev], a[i]
+		}
+	}
+}
+
+// kerDIT8 is a kernel that process a FFT of size 8
+func kerDIT8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+	fr.Butterfly(&a[0], &a[2])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[5], &a[7])
+	fr.Butterfly(&a[0], &a[4])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	fr.Butterfly(&a[1], &a[5])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	fr.Butterfly(&a[2], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[3], &a[7])
+}
+
+// kerDIF8 is a kernel that process a FFT of size 8
+func kerDIF8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[4])
+	fr.Butterfly(&a[1], &a[5])
+	fr.Butterfly(&a[2], &a[6])
+	fr.Butterfly(&a[3], &a[7])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[0], &a[2])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	fr.Butterfly(&a[5], &a[7])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+}
diff --git a/ecc/bw6-756/fr/fft/fft_test.go b/ecc/bw6-756/fr/fft/fft_test.go
new file mode 100644
index 000000000..4748e01b9
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fft_test.go
@@ -0,0 +1,415 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/big"
+	"strconv"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestFFT(t *testing.T) {
+	const maxSize = 1 << 10
+
+	nbCosets := 3
+	domainWithPrecompute := NewDomain(maxSize, 2, true)
+	domainWOPrecompute := NewDomain(maxSize, 2, false)
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 5
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("DIF FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 0)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets with precomputed values should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWithPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets W/O precompute should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWOPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWOPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIT FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			BitReverse(pol)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && pol[i].Equal(&backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWithPrecompute.FFT(pol, DIT, uint64(i))
+				domainWithPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWOPrecompute.FFT(pol, DIT, uint64(i))
+				domainWOPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 1)
+			domainWithPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFTInverse(pol, DIF, 1)
+			domainWOPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// --------------------------------------------------------------------
+// benches
+func BenchmarkBitReverse(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		b.Run("bit reversing 2**"+strconv.Itoa(i)+"bits", func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				BitReverse(pol[:1<<i])
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFT(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		sizeDomain := 1 << i
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (no cosets)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 0, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 0)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets without precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets with precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, true)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFTDITCosetReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIT, 1)
+	}
+}
+
+func BenchmarkFFTDIFReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIF, 0)
+	}
+}
+
+func evaluatePolynomial(pol []fr.Element, val fr.Element) fr.Element {
+	var acc, res, tmp fr.Element
+	res.Set(&pol[0])
+	acc.Set(&val)
+	for i := 1; i < len(pol); i++ {
+		tmp.Mul(&acc, &pol[i])
+		res.Add(&res, &tmp)
+		acc.Mul(&acc, &val)
+	}
+	return res
+}
diff --git a/ecc/bw6-756/fr/fft/fuzz.go b/ecc/bw6-756/fr/fft/fuzz.go
new file mode 100644
index 000000000..1c35691b5
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fuzz.go
@@ -0,0 +1,74 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	// random polynomial
+	size := len(data) / 8
+	if size == 0 {
+		return fuzzDiscard
+	}
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	paddedSize := ecc.NextPowerOfTwo(uint64(size))
+	p1 := make([]fr.Element, paddedSize)
+	p2 := make([]fr.Element, paddedSize)
+	for i := 0; i < len(p1); i++ {
+		p1[i].SetRawBytes(r)
+	}
+	copy(p2, p1)
+
+	// fft domain
+	nbCosets := uint64(uint8(data[0]) % 3)
+	domainWithPrecompute := NewDomain(paddedSize, nbCosets, true)
+	domainWOPrecompute := NewDomain(paddedSize, nbCosets, false)
+
+	// bitReverse(DIF FFT(DIT FFT (bitReverse))))==id
+	for i := uint64(0); i < nbCosets; i++ {
+		BitReverse(p1)
+		domainWithPrecompute.FFT(p1, DIT, i)
+		domainWOPrecompute.FFTInverse(p1, DIF, i)
+		BitReverse(p1)
+
+		for i := 0; i < len(p1); i++ {
+			if !p1[i].Equal(&p2[i]) {
+				panic(fmt.Sprintf("bitReverse(DIF FFT(DIT FFT (bitReverse)))) != id, size %d", size))
+			}
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/fft/fuzz_test.go b/ecc/bw6-756/fr/fft/fuzz_test.go
new file mode 100644
index 000000000..9890547c0
--- /dev/null
+++ b/ecc/bw6-756/fr/fft/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/fr/kzg/doc.go b/ecc/bw6-756/fr/kzg/doc.go
new file mode 100644
index 000000000..d8a77e8f6
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package kzg provides a KZG commitment scheme.
+package kzg
diff --git a/ecc/bw6-756/fr/kzg/fuzz.go b/ecc/bw6-756/fr/kzg/fuzz.go
new file mode 100644
index 000000000..de1704a8a
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/fuzz.go
@@ -0,0 +1,84 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	if len(data) == 0 {
+		return fuzzDiscard
+	}
+	size := int(uint8(data[0])) + 2 // TODO fix min size in NewScheme
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	r := bytes.NewReader(data[1:])
+	var alpha, point fr.Element
+	alpha.SetRawBytes(r)
+	point.SetRawBytes(r)
+	s := NewScheme(size, alpha)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, size/2)
+	for i := 0; i < len(f); i++ {
+		f[i] = make(polynomial.Polynomial, size)
+		for j := 0; j < len(f[i]); j++ {
+			f[i][j].SetRawBytes(r)
+		}
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, size/2)
+	for i := 0; i < len(digests); i++ {
+		digests[i], _ = s.Commit(f[i])
+
+	}
+
+	proof, err := s.BatchOpenSinglePoint(&point, digests, f)
+	if err != nil {
+		panic(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < len(f); i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			panic("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = s.BatchVerifySinglePoint(digests, &proof)
+	if err != nil {
+		panic(err)
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/kzg/fuzz_test.go b/ecc/bw6-756/fr/kzg/fuzz_test.go
new file mode 100644
index 000000000..8379a59c7
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/fr/kzg/kzg.go b/ecc/bw6-756/fr/kzg/kzg.go
new file mode 100644
index 000000000..0ff86eff1
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/kzg.go
@@ -0,0 +1,518 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"errors"
+	"hash"
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	"github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrInvalidNbDigests              = errors.New("number of digests is not the same as the number of polynomials")
+	ErrInvalidPolynomialSize         = errors.New("invalid polynomial size (larger than SRS or == 0)")
+	ErrVerifyOpeningProof            = errors.New("can't verify opening proof")
+	ErrVerifyBatchOpeningSinglePoint = errors.New("can't verify batch opening proof at single point")
+	ErrMinSRSSize                    = errors.New("minimum srs size is 2")
+)
+
+// Digest commitment of a polynomial.
+type Digest = bw6756.G1Affine
+
+// SRS stores the result of the MPC
+type SRS struct {
+	G1 []bw6756.G1Affine  // [gen [alpha]gen , [alpha**2]gen, ... ]
+	G2 [2]bw6756.G2Affine // [gen, [alpha]gen ]
+}
+
+// NewSRS returns a new SRS using alpha as randomness source
+//
+// In production, a SRS generated through MPC should be used.
+//
+// implements io.ReaderFrom and io.WriterTo
+func NewSRS(size uint64, bAlpha *big.Int) (*SRS, error) {
+	if size < 2 {
+		return nil, ErrMinSRSSize
+	}
+	var srs SRS
+	srs.G1 = make([]bw6756.G1Affine, size)
+
+	var alpha fr.Element
+	alpha.SetBigInt(bAlpha)
+
+	_, _, gen1Aff, gen2Aff := bw6756.Generators()
+	srs.G1[0] = gen1Aff
+	srs.G2[0] = gen2Aff
+	srs.G2[1].ScalarMultiplication(&gen2Aff, bAlpha)
+
+	alphas := make([]fr.Element, size-1)
+	alphas[0] = alpha
+	for i := 1; i < len(alphas); i++ {
+		alphas[i].Mul(&alphas[i-1], &alpha)
+	}
+	for i := 0; i < len(alphas); i++ {
+		alphas[i].FromMont()
+	}
+	g1s := bw6756.BatchScalarMultiplicationG1(&gen1Aff, alphas)
+	copy(srs.G1[1:], g1s)
+
+	return &srs, nil
+}
+
+// OpeningProof KZG proof for opening at a single point.
+//
+// implements io.ReaderFrom and io.WriterTo
+type OpeningProof struct {
+	// H quotient polynomial (f - f(z))/(x-z)
+	H bw6756.G1Affine
+
+	// Point at which the polynomial is evaluated
+	Point fr.Element
+
+	// ClaimedValue purported value
+	ClaimedValue fr.Element
+}
+
+// BatchOpeningProof opening proof for many polynomials at the same point
+//
+// implements io.ReaderFrom and io.WriterTo
+type BatchOpeningProof struct {
+	// H quotient polynomial Sum_i gamma**i*(f - f(z))/(x-z)
+	H bw6756.G1Affine
+
+	// Point at which the polynomials are evaluated
+	Point fr.Element
+
+	// ClaimedValues purported values
+	ClaimedValues []fr.Element
+}
+
+// Commit commits to a polynomial using a multi exponentiation with the SRS.
+// It is assumed that the polynomial is in canonical form, in Montgomery form.
+func Commit(p polynomial.Polynomial, srs *SRS, nbTasks ...int) (Digest, error) {
+
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return Digest{}, ErrInvalidPolynomialSize
+	}
+
+	var res bw6756.G1Affine
+
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	if len(nbTasks) > 0 {
+		config.NbTasks = nbTasks[0]
+	}
+	if _, err := res.MultiExp(srs.G1[:len(p)], p, config); err != nil {
+		return Digest{}, err
+	}
+
+	return res, nil
+}
+
+// Open computes an opening proof of polynomial p at given point.
+// fft.Domain Cardinality must be larger than p.Degree()
+func Open(p polynomial.Polynomial, point *fr.Element, domain *fft.Domain, srs *SRS) (OpeningProof, error) {
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return OpeningProof{}, ErrInvalidPolynomialSize
+	}
+
+	// build the proof
+	res := OpeningProof{
+		Point:        *point,
+		ClaimedValue: p.Eval(point),
+	}
+
+	// compute H
+	_p := make(polynomial.Polynomial, len(p))
+	copy(_p, p)
+	h := dividePolyByXminusA(_p, res.ClaimedValue, res.Point)
+
+	_p = nil // h re-use this memory
+
+	// commit to H
+	hCommit, err := Commit(h, srs)
+	if err != nil {
+		return OpeningProof{}, err
+	}
+	res.H.Set(&hCommit)
+
+	return res, nil
+}
+
+// Verify verifies a KZG opening proof at a single point
+func Verify(commitment *Digest, proof *OpeningProof, srs *SRS) error {
+
+	// comm(f(a))
+	var claimedValueG1Aff bw6756.G1Affine
+	var claimedValueBigInt big.Int
+	proof.ClaimedValue.ToBigIntRegular(&claimedValueBigInt)
+	claimedValueG1Aff.ScalarMultiplication(&srs.G1[0], &claimedValueBigInt)
+
+	// [f(alpha) - f(a)]G1Jac
+	var fminusfaG1Jac, tmpG1Jac bw6756.G1Jac
+	fminusfaG1Jac.FromAffine(commitment)
+	tmpG1Jac.FromAffine(&claimedValueG1Aff)
+	fminusfaG1Jac.SubAssign(&tmpG1Jac)
+
+	// [-H(alpha)]G1Aff
+	var negH bw6756.G1Affine
+	negH.Neg(&proof.H)
+
+	// [alpha-a]G2Jac
+	var alphaMinusaG2Jac, genG2Jac, alphaG2Jac bw6756.G2Jac
+	var pointBigInt big.Int
+	proof.Point.ToBigIntRegular(&pointBigInt)
+	genG2Jac.FromAffine(&srs.G2[0])
+	alphaG2Jac.FromAffine(&srs.G2[1])
+	alphaMinusaG2Jac.ScalarMultiplication(&genG2Jac, &pointBigInt).
+		Neg(&alphaMinusaG2Jac).
+		AddAssign(&alphaG2Jac)
+
+	// [alpha-a]G2Aff
+	var xminusaG2Aff bw6756.G2Affine
+	xminusaG2Aff.FromJacobian(&alphaMinusaG2Jac)
+
+	// [f(alpha) - f(a)]G1Aff
+	var fminusfaG1Aff bw6756.G1Affine
+	fminusfaG1Aff.FromJacobian(&fminusfaG1Jac)
+
+	// e([-H(alpha)]G1Aff, G2gen).e([-H(alpha)]G1Aff, [alpha-a]G2Aff) ==? 1
+	check, err := bw6756.PairingCheck(
+		[]bw6756.G1Affine{fminusfaG1Aff, negH},
+		[]bw6756.G2Affine{srs.G2[0], xminusaG2Aff},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+}
+
+// BatchOpenSinglePoint creates a batch opening proof at _val of a list of polynomials.
+// It's an interactive protocol, made non interactive using Fiat Shamir.
+// point is the point at which the polynomials are opened.
+// digests is the list of committed polynomials to open, need to derive the challenge using Fiat Shamir.
+// polynomials is the list of polynomials to open.
+func BatchOpenSinglePoint(polynomials []polynomial.Polynomial, digests []Digest, point *fr.Element, hf hash.Hash, domain *fft.Domain, srs *SRS) (BatchOpeningProof, error) {
+
+	// check for invalid sizes
+	nbDigests := len(digests)
+	if nbDigests != len(polynomials) {
+		return BatchOpeningProof{}, ErrInvalidNbDigests
+	}
+	largestPoly := -1
+	for _, p := range polynomials {
+		if len(p) == 0 || len(p) > len(srs.G1) {
+			return BatchOpeningProof{}, ErrInvalidPolynomialSize
+		}
+		if len(p) > largestPoly {
+			largestPoly = len(p)
+		}
+	}
+
+	var res BatchOpeningProof
+
+	// compute the purported values
+	res.ClaimedValues = make([]fr.Element, len(polynomials))
+	var wg sync.WaitGroup
+	wg.Add(len(polynomials))
+	for i := 0; i < len(polynomials); i++ {
+		go func(at int) {
+			res.ClaimedValues[at] = polynomials[at].Eval(point)
+			wg.Done()
+		}(i)
+	}
+
+	// set the point at which the evaluation is done
+	res.Point = *point
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(res.Point, digests, hf)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	// compute sum_i gamma**i*f(a)
+	var sumGammaiTimesEval fr.Element
+	chSumGammai := make(chan struct{}, 1)
+	go func() {
+		// wait for polynomial evaluations to be completed (res.ClaimedValues)
+		wg.Wait()
+		sumGammaiTimesEval = res.ClaimedValues[nbDigests-1]
+		for i := nbDigests - 2; i >= 0; i-- {
+			sumGammaiTimesEval.Mul(&sumGammaiTimesEval, &gamma).
+				Add(&sumGammaiTimesEval, &res.ClaimedValues[i])
+		}
+		close(chSumGammai)
+	}()
+
+	// compute sum_i gamma**i*f
+	// that is p0 + gamma * p1 + gamma^2 * p2 + ... gamma^n * pn
+	// note: if we are willing to paralellize that, we could clone the poly and scale them by
+	// gamma n in parallel, before reducing into sumGammaiTimesPol
+	sumGammaiTimesPol := make(polynomial.Polynomial, largestPoly)
+	copy(sumGammaiTimesPol, polynomials[0])
+	gammaN := gamma
+	var pj fr.Element
+	for i := 1; i < len(polynomials); i++ {
+		for j := 0; j < len(polynomials[i]); j++ {
+			pj.Mul(&polynomials[i][j], &gammaN)
+			sumGammaiTimesPol[j].Add(&sumGammaiTimesPol[j], &pj)
+		}
+		gammaN.Mul(&gammaN, &gamma)
+	}
+
+	// compute H
+	<-chSumGammai
+	h := dividePolyByXminusA(sumGammaiTimesPol, sumGammaiTimesEval, res.Point)
+	sumGammaiTimesPol = nil // same memory as h
+
+	res.H, err = Commit(h, srs)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	return res, nil
+}
+
+// FoldProof fold the digests and the proofs in batchOpeningProof using Fiat Shamir
+// to obtain an opening proof at a single point.
+//
+// * digests list of digests on which batchOpeningProof is based
+// * batchOpeningProof opening proof of digests
+// * returns the folded version of batchOpeningProof, Digest, the folded version of digests
+func FoldProof(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash) (OpeningProof, Digest, error) {
+
+	nbDigests := len(digests)
+
+	// check consistancy between numbers of claims vs number of digests
+	if nbDigests != len(batchOpeningProof.ClaimedValues) {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(batchOpeningProof.Point, digests, hf)
+	if err != nil {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// fold the claimed values and digests
+	gammai := make([]fr.Element, nbDigests)
+	gammai[0].SetOne()
+	for i := 1; i < nbDigests; i++ {
+		gammai[i].Mul(&gammai[i-1], &gamma)
+	}
+	foldedDigests, foldedEvaluations, err := fold(digests, batchOpeningProof.ClaimedValues, gammai)
+	if err != nil {
+		return OpeningProof{}, Digest{}, err
+	}
+
+	// create the folded opening proof
+	var res OpeningProof
+	res.ClaimedValue.Set(&foldedEvaluations)
+	res.H.Set(&batchOpeningProof.H)
+	res.Point.Set(&batchOpeningProof.Point)
+
+	return res, foldedDigests, nil
+}
+
+// BatchVerifySinglePoint verifies a batched opening proof at a single point of a list of polynomials.
+//
+// * digests list of digests on which opening proof is done
+// * batchOpeningProof proof of correct opening on the digests
+func BatchVerifySinglePoint(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash, srs *SRS) error {
+
+	// fold the proof
+	foldedProof, foldedDigest, err := FoldProof(digests, batchOpeningProof, hf)
+	if err != nil {
+		return err
+	}
+
+	// verify the foldedProof againts the foldedDigest
+	err = Verify(&foldedDigest, &foldedProof, srs)
+	return err
+
+}
+
+// BatchVerifyMultiPoints batch verifies a list of opening proofs at different points.
+// The purpose of the batching is to have only one pairing for verifying several proofs.
+//
+// * digests list of committed polynomials which are opened
+// * proofs list of opening proofs of the digest
+func BatchVerifyMultiPoints(digests []Digest, proofs []OpeningProof, srs *SRS) error {
+
+	// check consistancy nb proogs vs nb digests
+	if len(digests) != len(proofs) {
+		return ErrInvalidNbDigests
+	}
+
+	// if only one digest, call Verify
+	if len(digests) == 1 {
+		return Verify(&digests[0], &proofs[0], srs)
+	}
+
+	// sample random numbers for sampling
+	randomNumbers := make([]fr.Element, len(digests))
+	randomNumbers[0].SetOne()
+	for i := 1; i < len(randomNumbers); i++ {
+		_, err := randomNumbers[i].SetRandom()
+		if err != nil {
+			return err
+		}
+	}
+
+	// combine random_i*quotient_i
+	var foldedQuotients bw6756.G1Affine
+	quotients := make([]bw6756.G1Affine, len(proofs))
+	for i := 0; i < len(randomNumbers); i++ {
+		quotients[i].Set(&proofs[i].H)
+	}
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	_, err := foldedQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return nil
+	}
+
+	// fold digests and evals
+	evals := make([]fr.Element, len(digests))
+	for i := 0; i < len(randomNumbers); i++ {
+		evals[i].Set(&proofs[i].ClaimedValue)
+	}
+	foldedDigests, foldedEvals, err := fold(digests, evals, randomNumbers)
+	if err != nil {
+		return err
+	}
+
+	// compute commitment to folded Eval
+	var foldedEvalsCommit bw6756.G1Affine
+	var foldedEvalsBigInt big.Int
+	foldedEvals.ToBigIntRegular(&foldedEvalsBigInt)
+	foldedEvalsCommit.ScalarMultiplication(&srs.G1[0], &foldedEvalsBigInt)
+
+	// compute F = foldedDigests - foldedEvalsCommit
+	foldedDigests.Sub(&foldedDigests, &foldedEvalsCommit)
+
+	// combine random_i*(point_i*quotient_i)
+	var foldedPointsQuotients bw6756.G1Affine
+	for i := 0; i < len(randomNumbers); i++ {
+		randomNumbers[i].Mul(&randomNumbers[i], &proofs[i].Point)
+	}
+	_, err = foldedPointsQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return err
+	}
+
+	// lhs first pairing
+	foldedDigests.Add(&foldedDigests, &foldedPointsQuotients)
+
+	// lhs second pairing
+	foldedQuotients.Neg(&foldedQuotients)
+
+	// pairing check
+	check, err := bw6756.PairingCheck(
+		[]bw6756.G1Affine{foldedDigests, foldedQuotients},
+		[]bw6756.G2Affine{srs.G2[0], srs.G2[1]},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+
+}
+
+// fold folds digests and evaluations using the list of factors as random numbers.
+//
+// * digests list of digests to fold
+// * evaluations list of evaluations to fold
+// * factors list of multiplicative factors used for the folding (in Montgomery form)
+func fold(digests []Digest, evaluations []fr.Element, factors []fr.Element) (Digest, fr.Element, error) {
+
+	// length inconsistancy between digests and evaluations should have been done before calling this function
+	nbDigests := len(digests)
+
+	// fold the claimed values
+	var foldedEvaluations, tmp fr.Element
+	for i := 0; i < nbDigests; i++ {
+		tmp.Mul(&evaluations[i], &factors[i])
+		foldedEvaluations.Add(&foldedEvaluations, &tmp)
+	}
+
+	// fold the digests
+	var foldedDigests Digest
+	_, err := foldedDigests.MultiExp(digests, factors, ecc.MultiExpConfig{ScalarsMont: true})
+	if err != nil {
+		return foldedDigests, foldedEvaluations, err
+	}
+
+	// folding done
+	return foldedDigests, foldedEvaluations, nil
+
+}
+
+// deriveGamma derives a challenge using Fiat Shamir to fold proofs.
+func deriveGamma(point fr.Element, digests []Digest, hf hash.Hash) (fr.Element, error) {
+
+	// derive the challenge gamma, binded to the point and the commitments
+	fs := fiatshamir.NewTranscript(hf, "gamma")
+	if err := fs.Bind("gamma", point.Marshal()); err != nil {
+		return fr.Element{}, err
+	}
+	for i := 0; i < len(digests); i++ {
+		if err := fs.Bind("gamma", digests[i].Marshal()); err != nil {
+			return fr.Element{}, err
+		}
+	}
+	gammaByte, err := fs.ComputeChallenge("gamma")
+	if err != nil {
+		return fr.Element{}, err
+	}
+	var gamma fr.Element
+	gamma.SetBytes(gammaByte)
+
+	return gamma, nil
+}
+
+// dividePolyByXminusA computes (f-f(a))/(x-a), in canonical basis, in regular form
+// f memory is re-used for the result
+func dividePolyByXminusA(f polynomial.Polynomial, fa, a fr.Element) polynomial.Polynomial {
+
+	// first we compute f-f(a)
+	f[0].Sub(&f[0], &fa)
+
+	// now we use syntetic division to divide by x-a
+	var t fr.Element
+	for i := len(f) - 2; i >= 0; i-- {
+		t.Mul(&f[i+1], &a)
+
+		f[i].Add(&f[i], &t)
+	}
+
+	// the result is of degree deg(f)-1
+	return f[1:]
+}
diff --git a/ecc/bw6-756/fr/kzg/kzg_test.go b/ecc/bw6-756/fr/kzg/kzg_test.go
new file mode 100644
index 000000000..9e0757166
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/kzg_test.go
@@ -0,0 +1,453 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"math/big"
+	"reflect"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+)
+
+// testSRS re-used accross tests of the KZG scheme
+var testSRS *SRS
+
+func init() {
+	const srsSize = 230
+	testSRS, _ = NewSRS(ecc.NextPowerOfTwo(srsSize), new(big.Int).SetInt64(42))
+}
+
+func TestDividePolyByXminusA(t *testing.T) {
+
+	const pSize = 230
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	// evaluate the polynomial at a random point
+	var point fr.Element
+	point.SetRandom()
+	evaluation := pol.Eval(&point)
+
+	// probabilistic test (using Schwartz Zippel lemma, evaluation at one point is enough)
+	var randPoint, xminusa fr.Element
+	randPoint.SetRandom()
+	polRandpoint := pol.Eval(&randPoint)
+	polRandpoint.Sub(&polRandpoint, &evaluation) // f(rand)-f(point)
+
+	// compute f-f(a)/x-a
+	h := dividePolyByXminusA(pol, evaluation, point)
+	pol = nil // h reuses this memory
+
+	if len(h) != 229 {
+		t.Fatal("inconsistant size of quotient")
+	}
+
+	hRandPoint := h.Eval(&randPoint)
+	xminusa.Sub(&randPoint, &point) // rand-point
+
+	// f(rand)-f(point)	==? h(rand)*(rand-point)
+	hRandPoint.Mul(&hRandPoint, &xminusa)
+
+	if !hRandPoint.Equal(&polRandpoint) {
+		t.Fatal("Error f-f(a)/x-a")
+	}
+}
+
+func TestSerializationSRS(t *testing.T) {
+
+	// create a SRS
+	srs, err := NewSRS(64, new(big.Int).SetInt64(42))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// serialize it...
+	var buf bytes.Buffer
+	_, err = srs.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// reconstruct the SRS
+	var _srs SRS
+	_, err = _srs.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compare
+	if !reflect.DeepEqual(srs, &_srs) {
+		t.Fatal("scheme serialization failed")
+	}
+
+}
+
+func TestCommit(t *testing.T) {
+
+	// create a polynomial
+	f := make(polynomial.Polynomial, 60)
+	for i := 0; i < 60; i++ {
+		f[i].SetRandom()
+	}
+
+	// commit using the method from KZG
+	_kzgCommit, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var kzgCommit bw6756.G1Affine
+	kzgCommit.Unmarshal(_kzgCommit.Marshal())
+
+	// check commitment using manual commit
+	var x fr.Element
+	x.SetString("42")
+	fx := f.Eval(&x)
+	var fxbi big.Int
+	fx.ToBigIntRegular(&fxbi)
+	var manualCommit bw6756.G1Affine
+	manualCommit.Set(&testSRS.G1[0])
+	manualCommit.ScalarMultiplication(&manualCommit, &fxbi)
+
+	// compare both results
+	if !kzgCommit.Equal(&manualCommit) {
+		t.Fatal("error KZG commitment")
+	}
+
+}
+
+func TestVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create a polynomial
+	f := randomPolynomial(60)
+
+	// commit the polynomial
+	digest, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := Open(f, &point, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed valued
+	expected := f.Eval(&point)
+	if !proof.ClaimedValue.Equal(&expected) {
+		t.Fatal("inconsistant claimed value")
+	}
+
+	// verify correct proof
+	err = Verify(&digest, &proof, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValue.Double(&proof.ClaimedValue)
+	err = Verify(&digest, &proof, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+}
+
+func TestBatchVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := BatchOpenSinglePoint(f, digests, &point, hf, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < 10; i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			t.Fatal("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+
+}
+
+func TestBatchVerifyMultiPoints(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute 2 batch opening proofs at 2 random points
+	points := make([]fr.Element, 2)
+	batchProofs := make([]BatchOpeningProof, 2)
+	points[0].SetRandom()
+	batchProofs[0], _ = BatchOpenSinglePoint(f[:5], digests[:5], &points[0], hf, domain, testSRS)
+	points[1].SetRandom()
+	batchProofs[1], _ = BatchOpenSinglePoint(f[5:], digests[5:], &points[1], hf, domain, testSRS)
+
+	// fold the 2 batch opening proofs
+	proofs := make([]OpeningProof, 2)
+	foldedDigests := make([]Digest, 2)
+	proofs[0], foldedDigests[0], _ = FoldProof(digests[:5], &batchProofs[0], hf)
+	proofs[1], foldedDigests[1], _ = FoldProof(digests[5:], &batchProofs[1], hf)
+
+	// check the the individual batch proofs are correct
+	err := Verify(&foldedDigests[0], &proofs[0], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = Verify(&foldedDigests[1], &proofs[1], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify correct folded proofs
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify tampered folded proofs
+	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err == nil {
+		t.Fatal(err)
+	}
+
+}
+
+const benchSize = 1 << 16
+
+func BenchmarkKZGCommit(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Commit(p, benchSRS)
+	}
+}
+
+func BenchmarkDivideByXMinusA(b *testing.B) {
+	const pSize = 1 << 22
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+	var a, fa fr.Element
+	a.SetRandom()
+	fa.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		dividePolyByXminusA(pol, fa, a)
+		pol = pol[:pSize]
+		pol[pSize-1] = pol[0]
+	}
+}
+
+func BenchmarkKZGOpen(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Open(p, &r, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGVerify(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// kzg scheme
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	// commit
+	comm, err := Commit(p, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// open
+	openingProof, err := Open(p, &r, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Verify(&comm, &openingProof, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchOpen10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchVerify10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	proof, err := BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchVerifySinglePoint(commitments[:], &proof, hf, benchSRS)
+	}
+}
+
+func randomPolynomial(size int) polynomial.Polynomial {
+	f := make(polynomial.Polynomial, size)
+	for i := 0; i < size; i++ {
+		f[i].SetRandom()
+	}
+	return f
+}
diff --git a/ecc/bw6-756/fr/kzg/marshal.go b/ecc/bw6-756/fr/kzg/marshal.go
new file mode 100644
index 000000000..a79315f91
--- /dev/null
+++ b/ecc/bw6-756/fr/kzg/marshal.go
@@ -0,0 +1,138 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"io"
+)
+
+// WriteTo writes binary encoding of the SRS
+func (srs *SRS) WriteTo(w io.Writer) (int64, error) {
+	// encode the SRS
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		srs.G1,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes SRS data from reader.
+func (srs *SRS) ReadFrom(r io.Reader) (int64, error) {
+	// decode the SRS
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		&srs.G1,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a OpeningProof
+func (proof *OpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes OpeningProof data from reader.
+func (proof *OpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a BatchOpeningProof
+func (proof *BatchOpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bw6756.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		proof.ClaimedValues,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes BatchOpeningProof data from reader.
+func (proof *BatchOpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bw6756.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValues,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bw6-756/fr/mimc/doc.go b/ecc/bw6-756/fr/mimc/doc.go
new file mode 100644
index 000000000..497bd40a9
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package mimc provides MiMC hash function using Miyaguchi–Preneel construction.
+package mimc
diff --git a/ecc/bw6-756/fr/mimc/fuzz.go b/ecc/bw6-756/fr/mimc/fuzz.go
new file mode 100644
index 000000000..41b557cf3
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/fuzz.go
@@ -0,0 +1,34 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	var s []byte
+	h := NewMiMC(string(data))
+	h.Write(data)
+	h.Sum(s)
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fr/mimc/mimc.go b/ecc/bw6-756/fr/mimc/mimc.go
new file mode 100644
index 000000000..e0f90b33b
--- /dev/null
+++ b/ecc/bw6-756/fr/mimc/mimc.go
@@ -0,0 +1,174 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+import (
+	"hash"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"golang.org/x/crypto/sha3"
+)
+
+const mimcNbRounds = 91
+
+// BlockSize size that mimc consumes
+const BlockSize = fr.Bytes
+
+// Params constants for the mimc hash function
+type Params []fr.Element
+
+// NewParams creates new mimc object
+func NewParams(seed string) Params {
+
+	// set the constants
+	res := make(Params, mimcNbRounds)
+
+	rnd := sha3.Sum256([]byte(seed))
+	value := new(big.Int).SetBytes(rnd[:])
+
+	for i := 0; i < mimcNbRounds; i++ {
+		rnd = sha3.Sum256(value.Bytes())
+		value.SetBytes(rnd[:])
+		res[i].SetBigInt(value)
+	}
+
+	return res
+}
+
+// digest represents the partial evaluation of the checksum
+// along with the params of the mimc function
+type digest struct {
+	Params Params
+	h      fr.Element
+	data   []byte // data to hash
+}
+
+// NewMiMC returns a MiMCImpl object, pure-go reference implementation
+func NewMiMC(seed string) hash.Hash {
+	d := new(digest)
+	params := NewParams(seed)
+	//d.Reset()
+	d.Params = params
+	d.Reset()
+	return d
+}
+
+// Reset resets the Hash to its initial state.
+func (d *digest) Reset() {
+	d.data = nil
+	d.h = fr.Element{0, 0, 0, 0}
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (d *digest) Sum(b []byte) []byte {
+	buffer := d.checksum()
+	d.data = nil // flush the data already hashed
+	hash := buffer.Bytes()
+	b = append(b, hash[:]...)
+	return b
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (d *digest) Size() int {
+	return BlockSize
+}
+
+// BlockSize returns the number of bytes Sum will return.
+func (d *digest) BlockSize() int {
+	return BlockSize
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.data = append(d.data, p...)
+	return
+}
+
+// Hash hash using Miyaguchi–Preneel:
+// https://en.wikipedia.org/wiki/One-way_compression_function
+// The XOR operation is replaced by field addition, data is in Montgomery form
+func (d *digest) checksum() fr.Element {
+
+	var buffer [BlockSize]byte
+	var x fr.Element
+
+	// if data size is not multiple of BlockSizes we padd:
+	// .. || 0xaf8 -> .. || 0x0000...0af8
+	if len(d.data)%BlockSize != 0 {
+		q := len(d.data) / BlockSize
+		r := len(d.data) % BlockSize
+		sliceq := make([]byte, q*BlockSize)
+		copy(sliceq, d.data)
+		slicer := make([]byte, r)
+		copy(slicer, d.data[q*BlockSize:])
+		sliceremainder := make([]byte, BlockSize-r)
+		d.data = append(sliceq, sliceremainder...)
+		d.data = append(d.data, slicer...)
+	}
+
+	if len(d.data) == 0 {
+		d.data = make([]byte, 32)
+	}
+
+	nbChunks := len(d.data) / BlockSize
+
+	for i := 0; i < nbChunks; i++ {
+		copy(buffer[:], d.data[i*BlockSize:(i+1)*BlockSize])
+		x.SetBytes(buffer[:])
+		d.encrypt(x)
+		d.h.Add(&x, &d.h)
+	}
+
+	return d.h
+}
+
+// plain execution of a mimc run
+// m: message
+// k: encryption key
+func (d *digest) encrypt(m fr.Element) {
+
+	for i := 0; i < len(d.Params); i++ {
+		// m = (m+k+c)^5
+		var tmp fr.Element
+		tmp.Add(&m, &d.h).Add(&tmp, &d.Params[i])
+		m.Square(&tmp).
+			Square(&m).
+			Mul(&m, &tmp)
+	}
+	m.Add(&m, &d.h)
+	d.h = m
+}
+
+// Sum computes the mimc hash of msg from seed
+func Sum(seed string, msg []byte) ([]byte, error) {
+	params := NewParams(seed)
+	var d digest
+	d.Params = params
+	if _, err := d.Write(msg); err != nil {
+		return nil, err
+	}
+	h := d.checksum()
+	bytes := h.Bytes()
+	return bytes[:], nil
+}
diff --git a/ecc/bw6-756/fr/permutation/doc.go b/ecc/bw6-756/fr/permutation/doc.go
new file mode 100644
index 000000000..bdf98e6ca
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package permutation provides an API to build permutation proofs.
+package permutation
diff --git a/ecc/bw6-756/fr/permutation/permutation.go b/ecc/bw6-756/fr/permutation/permutation.go
new file mode 100644
index 000000000..8deb3563b
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/permutation.go
@@ -0,0 +1,361 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("t1 and t2 should be of the same size")
+	ErrSize             = errors.New("t1 and t2 should be of size a power of 2")
+	ErrPermutationProof = errors.New("permutation proof verification failed")
+)
+
+// Proof proof that the commitments of t1 and t2 come from
+// the same vector but permuted.
+type Proof struct {
+
+	// size of the polynomials
+	size int
+
+	// commitments of t1 & t2, the permuted vectors, and z, the accumulation
+	// polynomial
+	t1, t2, z kzg.Digest
+
+	// commitment to the quotient polynomial
+	q kzg.Digest
+
+	// opening proofs of t1, t2, z, q (in that order)
+	batchedProof kzg.BatchOpeningProof
+
+	// shifted opening proof of z
+	shiftedProof kzg.OpeningProof
+}
+
+// computeZ returns the accumulation polynomial in Lagrange basis.
+func computeZ(lt1, lt2 []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	z := make([]fr.Element, s)
+	d := make([]fr.Element, s)
+	z[0].SetOne()
+	d[0].SetOne()
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	var t fr.Element
+	for i := 0; i < s-1; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_i], t.Sub(&epsilon, &lt1[i]))
+		d[i+1].Mul(&d[i], t.Sub(&epsilon, &lt2[i]))
+	}
+	d = fr.BatchInvert(d)
+	for i := 0; i < s-1; i++ {
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_ii], &d[i+1])
+	}
+
+	return z
+}
+
+// computeH computes lt2*z(gx) - lt1*z
+func computeH(lt1, lt2, lz []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	res := make([]fr.Element, s)
+	var a, b fr.Element
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		a.Sub(&epsilon, &lt2[_i])
+		a.Mul(&lz[_ii], &a)
+		b.Sub(&epsilon, &lt1[_i])
+		b.Mul(&lz[_i], &b)
+		res[_i].Sub(&a, &b)
+	}
+	return res
+}
+
+// computeH0 computes L0 * (z-1)
+func computeH0(lz []fr.Element, d *fft.Domain) []fr.Element {
+
+	var tn, o, g fr.Element
+	s := len(lz)
+	tn.SetUint64(2).
+		Neg(&tn)
+	u := make([]fr.Element, s)
+	o.SetOne()
+	g.Set(&d.FinerGenerator)
+	for i := 0; i < s; i++ {
+		u[i].Sub(&g, &o)
+		g.Mul(&g, &d.Generator)
+	}
+	u = fr.BatchInvert(u)
+	res := make([]fr.Element, s)
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lz[_i], &o).
+			Mul(&res[_i], &u[i]).
+			Mul(&res[_i], &tn)
+	}
+	return res
+}
+
+// Prove generates a proof that t1 and t2 are the same but permuted.
+// The size of t1 and t2 should be the same and a power of 2.
+func Prove(srs *kzg.SRS, t1, t2 []fr.Element) (Proof, error) {
+
+	// res
+	var proof Proof
+	var err error
+
+	// size checking
+	if len(t1) != len(t2) {
+		return proof, ErrIncompatibleSize
+	}
+
+	// create the domains
+	d := fft.NewDomain(uint64(len(t1)), 1, false)
+	if d.Cardinality != uint64(len(t1)) {
+		return proof, ErrSize
+	}
+	s := int(d.Cardinality)
+	proof.size = s
+
+	// hash function for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// commit t1, t2
+	ct1 := make([]fr.Element, s)
+	ct2 := make([]fr.Element, s)
+	copy(ct1, t1)
+	copy(ct2, t2)
+	d.FFTInverse(ct1, fft.DIF, 0)
+	d.FFTInverse(ct2, fft.DIF, 0)
+	fft.BitReverse(ct1)
+	fft.BitReverse(ct2)
+	proof.t1, err = kzg.Commit(ct1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.t2, err = kzg.Commit(ct2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive challenge for z
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute Z and commit it
+	cz := computeZ(t1, t2, epsilon)
+	d.FFTInverse(cz, fft.DIT, 0)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+	lz := make([]fr.Element, s)
+	copy(lz, cz)
+	d.FFT(lz, fft.DIF, 1)
+
+	// compute the first part of the numerator
+	lt1 := make([]fr.Element, s)
+	lt2 := make([]fr.Element, s)
+	copy(lt1, ct1)
+	copy(lt2, ct2)
+	d.FFT(lt1, fft.DIF, 1)
+	d.FFT(lt2, fft.DIF, 1)
+	h := computeH(lt1, lt2, lz, epsilon)
+
+	// compute second part of the numerator
+	h0 := computeH0(lz, d)
+
+	// derive challenge used for the folding
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+
+	// fold the numerator and divide it by x^n-1
+	var t fr.Element
+	t.SetUint64(2).Neg(&t).Inverse(&t)
+	for i := 0; i < s; i++ {
+		h0[i].Mul(&omega, &h0[i]).
+			Add(&h0[i], &h[i]).
+			Mul(&h0[i], &t)
+	}
+
+	// get the quotient and commit it
+	d.FFTInverse(h0, fft.DIT, 1)
+	proof.q, err = kzg.Commit(h0, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive the evaluation challenge
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute the opening proofs
+	proof.batchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ct1,
+			ct2,
+			cz,
+			h0,
+		},
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&eta,
+		hFunc,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	eta.Mul(&eta, &d.Generator)
+	proof.shiftedProof, err = kzg.Open(
+		cz,
+		&eta,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	// done
+	return proof, nil
+
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bw6756.G1Affine) (fr.Element, error) {
+
+	var buf [bw6756.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
+
+// Verify verifies a permutation proof.
+func Verify(srs *kzg.SRS, proof Proof) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// derive the challenges
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return err
+	}
+
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return err
+	}
+
+	// check the relation
+	bs := big.NewInt(int64(proof.size))
+	var l0, a, b, one, rhs, lhs fr.Element
+	one.SetOne()
+	rhs.Exp(eta, bs).
+		Sub(&rhs, &one)
+	a.Sub(&eta, &one)
+	l0.Div(&rhs, &a)
+	rhs.Mul(&rhs, &proof.batchedProof.ClaimedValues[3])
+	a.Sub(&epsilon, &proof.batchedProof.ClaimedValues[1]).
+		Mul(&a, &proof.shiftedProof.ClaimedValue)
+	b.Sub(&epsilon, &proof.batchedProof.ClaimedValues[0]).
+		Mul(&b, &proof.batchedProof.ClaimedValues[2])
+	lhs.Sub(&a, &b)
+	a.Sub(&proof.batchedProof.ClaimedValues[2], &one).
+		Mul(&a, &l0).
+		Mul(&a, &omega)
+	lhs.Add(&a, &lhs)
+	if !lhs.Equal(&rhs) {
+		return ErrPermutationProof
+	}
+
+	// check the opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&proof.batchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.Verify(&proof.z, &proof.shiftedProof, srs)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/ecc/bw6-756/fr/permutation/permutation_test.go b/ecc/bw6-756/fr/permutation/permutation_test.go
new file mode 100644
index 000000000..9f56b94c6
--- /dev/null
+++ b/ecc/bw6-756/fr/permutation/permutation_test.go
@@ -0,0 +1,94 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+)
+
+func TestProof(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	a := make([]fr.Element, 8)
+	b := make([]fr.Element, 8)
+
+	for i := 0; i < 8; i++ {
+		a[i].SetUint64(uint64(4*i + 1))
+	}
+	for i := 0; i < 8; i++ {
+		b[i].Set(&a[(5*i)%8])
+	}
+
+	// correct proof
+	{
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		a[0].SetRandom()
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkProver(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make([]fr.Element, polySize)
+	c := make([]fr.Element, polySize)
+
+	for i := 0; i < polySize; i++ {
+		a[i].SetUint64(uint64(i))
+	}
+	for i := 0; i < polySize; i++ {
+		c[i].Set(&a[(5*i)%(polySize)])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Prove(srs, a, c)
+	}
+
+}
diff --git a/ecc/bw6-756/fr/plookup/doc.go b/ecc/bw6-756/fr/plookup/doc.go
new file mode 100644
index 000000000..ec4b91287
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package plookup provides an API to build plookup proofs.
+package plookup
diff --git a/ecc/bw6-756/fr/plookup/plookup_test.go b/ecc/bw6-756/fr/plookup/plookup_test.go
new file mode 100644
index 000000000..5f8ed24ff
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/plookup_test.go
@@ -0,0 +1,139 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+)
+
+func TestLookupVector(t *testing.T) {
+
+	lookupVector := make(Table, 8)
+	fvector := make(Table, 7)
+	for i := 0; i < 8; i++ {
+		lookupVector[i].SetUint64(uint64(2 * i))
+	}
+	for i := 0; i < 7; i++ {
+		fvector[i].Set(&lookupVector[(4*i+1)%8])
+	}
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// correct proof vector
+	{
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proofs vector
+	{
+		fvector[0].SetRandom()
+
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func TestLookupTable(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	lookupTable := make([]Table, 3)
+	fTable := make([]Table, 3)
+	for i := 0; i < 3; i++ {
+		lookupTable[i] = make(Table, 8)
+		fTable[i] = make(Table, 7)
+		for j := 0; j < 8; j++ {
+			lookupTable[i][j].SetUint64(uint64(2*i + j))
+		}
+		for j := 0; j < 7; j++ {
+			fTable[i][j].Set(&lookupTable[i][(4*j+1)%8])
+		}
+	}
+
+	// correct proof
+	{
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		fTable[0][0].SetRandom()
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkPlookup(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make(Table, polySize)
+	c := make(Table, polySize)
+
+	for i := 0; i < 1<<14; i++ {
+		a[i].SetUint64(uint64(i))
+		c[i].SetUint64(uint64((8 * i) % polySize))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ProveLookupVector(srs, a, c)
+	}
+}
diff --git a/ecc/bw6-756/fr/plookup/table.go b/ecc/bw6-756/fr/plookup/table.go
new file mode 100644
index 000000000..7bbca097c
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/table.go
@@ -0,0 +1,252 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"sort"
+
+	bw6756 "github.com/consensys/gnark-crypto/ecc/bw6-756"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/permutation"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("the tables in f and t are not of the same size")
+	ErrFoldedCommitment = errors.New("the folded commitment is malformed")
+	ErrNumberDigests    = errors.New("proof.ts and proof.fs are not of the same length")
+)
+
+// ProofLookupTables proofs that a list of tables
+type ProofLookupTables struct {
+
+	// commitments to the rows f
+	fs []kzg.Digest
+
+	// commitments to the rows of t
+	ts []kzg.Digest
+
+	// lookup proof for the f and t folded
+	foldedProof ProofLookupVector
+
+	// proof that the ts folded correspond to t in the folded proof
+	permutationProof permutation.Proof
+}
+
+// ProveLookupTables generates a proof that f, seen as a multi dimensional table,
+// consists of vectors that are in t. In other words for each i, f[:][i] must be one
+// of the t[:][j].
+//
+// For instance, if t is the truth table of the XOR function, t will be populated such
+// that t[:][i] contains the i-th entry of the truth table, so t[0][i] XOR t[1][i] = t[2][i].
+//
+// The Table in f and t are supposed to be of the same size constant size.
+func ProveLookupTables(srs *kzg.SRS, f, t []Table) (ProofLookupTables, error) {
+
+	// res
+	proof := ProofLookupTables{}
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check the sizes
+	if len(f) != len(t) {
+		return proof, ErrIncompatibleSize
+	}
+	s := len(f[0])
+	for i := 1; i < len(f); i++ {
+		if len(f[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+	s = len(t[0])
+	for i := 1; i < len(t); i++ {
+		if len(t[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+
+	// commit to the tables in f and t
+	nbRows := len(t)
+	proof.fs = make([]kzg.Digest, nbRows)
+	proof.ts = make([]kzg.Digest, nbRows)
+	_nbColumns := len(f[0]) + 1
+	if _nbColumns < len(t[0]) {
+		_nbColumns = len(t[0])
+	}
+	d := fft.NewDomain(uint64(_nbColumns), 0, false)
+	nbColumns := d.Cardinality
+	lfs := make([][]fr.Element, nbRows)
+	cfs := make([][]fr.Element, nbRows)
+	lts := make([][]fr.Element, nbRows)
+	cts := make([][]fr.Element, nbRows)
+
+	for i := 0; i < nbRows; i++ {
+
+		cfs[i] = make([]fr.Element, nbColumns)
+		lfs[i] = make([]fr.Element, nbColumns)
+		copy(cfs[i], f[i])
+		copy(lfs[i], f[i])
+		for j := len(f[i]); j < int(nbColumns); j++ {
+			cfs[i][j] = f[i][len(f[i])-1]
+			lfs[i][j] = f[i][len(f[i])-1]
+		}
+		d.FFTInverse(cfs[i], fft.DIF, 0)
+		fft.BitReverse(cfs[i])
+		proof.fs[i], err = kzg.Commit(cfs[i], srs)
+		if err != nil {
+			return proof, err
+		}
+
+		cts[i] = make([]fr.Element, nbColumns)
+		lts[i] = make([]fr.Element, nbColumns)
+		copy(cts[i], t[i])
+		copy(lts[i], t[i])
+		for j := len(t[i]); j < int(d.Cardinality); j++ {
+			cts[i][j] = t[i][len(t[i])-1]
+			lts[i][j] = t[i][len(t[i])-1]
+		}
+		d.FFTInverse(cts[i], fft.DIF, 0)
+		fft.BitReverse(cts[i])
+		proof.ts[i], err = kzg.Commit(cts[i], srs)
+		if err != nil {
+			return proof, err
+		}
+	}
+
+	// fold f and t
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = new(kzg.Digest)
+		comms[i].Set(&proof.fs[i])
+		comms[nbRows+i] = new(kzg.Digest)
+		comms[nbRows+i].Set(&proof.ts[i])
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return proof, err
+	}
+	foldedf := make(Table, nbColumns)
+	foldedt := make(Table, nbColumns)
+	for i := 0; i < int(nbColumns); i++ {
+		for j := nbRows - 1; j >= 0; j-- {
+			foldedf[i].Mul(&foldedf[i], &lambda).
+				Add(&foldedf[i], &lfs[j][i])
+			foldedt[i].Mul(&foldedt[i], &lambda).
+				Add(&foldedt[i], &lts[j][i])
+		}
+	}
+
+	// generate a proof of permutation of the foldedt and sort(foldedt)
+	foldedtSorted := make(Table, nbColumns)
+	copy(foldedtSorted, foldedt)
+	sort.Sort(foldedtSorted)
+	proof.permutationProof, err = permutation.Prove(srs, foldedt, foldedtSorted)
+	if err != nil {
+		return proof, err
+	}
+
+	// call plookupVector, on foldedf[:len(foldedf)-1] to ensure that the domain size
+	// in ProveLookupVector is the same as d's
+	proof.foldedProof, err = ProveLookupVector(srs, foldedf[:len(foldedf)-1], foldedt)
+
+	return proof, err
+}
+
+// VerifyLookupTables verifies that a ProofLookupTables proof is correct.
+func VerifyLookupTables(srs *kzg.SRS, proof ProofLookupTables) error {
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check that the number of digests is the same
+	if len(proof.fs) != len(proof.ts) {
+		return ErrNumberDigests
+	}
+
+	// fold the commitments fs and ts
+	nbRows := len(proof.fs)
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = &proof.fs[i]
+		comms[i+nbRows] = &proof.ts[i]
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return err
+	}
+
+	// fold the commitments of the rows of t and f
+	var comf, comt kzg.Digest
+	comf.Set(&proof.fs[nbRows-1])
+	comt.Set(&proof.ts[nbRows-1])
+	var blambda big.Int
+	lambda.ToBigIntRegular(&blambda)
+	for i := nbRows - 2; i >= 0; i-- {
+		comf.ScalarMultiplication(&comf, &blambda).
+			Add(&comf, &proof.fs[i])
+		comt.ScalarMultiplication(&comt, &blambda).
+			Add(&comt, &proof.ts[i])
+	}
+
+	// check that the folded commitment of the fs correspond to foldedProof.f
+	if !comf.Equal(&proof.foldedProof.f) {
+		return ErrFoldedCommitment
+	}
+
+	// check that the folded commitment of the ts is a permutation of proof.FoldedProof.t
+	err = permutation.Verify(srs, proof.permutationProof)
+	if err != nil {
+		return err
+	}
+
+	// verify the inner proof
+	return VerifyLookupVector(srs, proof.foldedProof)
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bw6756.G1Affine) (fr.Element, error) {
+
+	var buf [bw6756.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
diff --git a/ecc/bw6-756/fr/plookup/vector.go b/ecc/bw6-756/fr/plookup/vector.go
new file mode 100644
index 000000000..4d64b9b49
--- /dev/null
+++ b/ecc/bw6-756/fr/plookup/vector.go
@@ -0,0 +1,687 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+	"sort"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrNotInTable          = errors.New("some value in the vector is not in the lookup table")
+	ErrPlookupVerification = errors.New("plookup verification failed")
+)
+
+type Table []fr.Element
+
+// Len is the number of elements in the collection.
+func (t Table) Len() int {
+	return len(t)
+}
+
+// Less reports whether the element with
+// index i should sort before the element with index j.
+func (t Table) Less(i, j int) bool {
+	return t[i].Cmp(&t[j]) == -1
+}
+
+// Swap swaps the elements with indexes i and j.
+func (t Table) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+// Proof Plookup proof, containing opening proofs
+type ProofLookupVector struct {
+
+	// size of the system
+	size uint64
+
+	// Commitments to h1, h2, t, z, f, h
+	h1, h2, t, z, f, h kzg.Digest
+
+	// Batch opening proof of h1, h2, z, t
+	BatchedProof kzg.BatchOpeningProof
+
+	// Batch opening proof of h1, h2, z shifted by g
+	BatchedProofShifted kzg.BatchOpeningProof
+}
+
+// computeZ computes Z, in Lagrange basis. Z is the accumulation of the partial
+// ratios of 2 fully split polynomials (cf https://eprint.iacr.org/2020/315.pdf)
+// * lf is the list of values that should be in lt
+// * lt is the lookup table
+// * lh1, lh2 is lf sorted by lt split in 2 overlapping slices
+// * beta, gamma are challenges (Schwartz-zippel: they are the random evaluations point)
+func computeZ(lf, lt, lh1, lh2 []fr.Element, beta, gamma fr.Element) []fr.Element {
+
+	z := make([]fr.Element, len(lt))
+
+	n := len(lt)
+	d := make([]fr.Element, n-1)
+	var u, c fr.Element
+	c.SetOne().
+		Add(&c, &beta).
+		Mul(&c, &gamma)
+	for i := 0; i < n-1; i++ {
+
+		d[i].Mul(&beta, &lh1[i+1]).
+			Add(&d[i], &lh1[i]).
+			Add(&d[i], &c)
+
+		u.Mul(&beta, &lh2[i+1]).
+			Add(&u, &lh2[i]).
+			Add(&u, &c)
+
+		d[i].Mul(&d[i], &u)
+	}
+	d = fr.BatchInvert(d)
+
+	z[0].SetOne()
+	var a, b, e fr.Element
+	e.SetOne().Add(&e, &beta)
+	for i := 0; i < n-1; i++ {
+
+		a.Add(&gamma, &lf[i])
+
+		b.Mul(&beta, &lt[i+1]).
+			Add(&b, &lt[i]).
+			Add(&b, &c)
+
+		a.Mul(&a, &b).
+			Mul(&a, &e)
+
+		z[i+1].Mul(&z[i], &a).
+			Mul(&z[i+1], &d[i])
+	}
+
+	return z
+}
+
+// computeH computes the evaluation (shifted, bit reversed) of h where
+// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+//
+// * cz, ch1, ch2, ct, cf are the polynomials z, h1, h2, t, f in canonical basis
+// * _lz, _lh1, _lh2, _lt, _lf are the polynomials z, h1, h2, t, f in shifted Lagrange basis (domainH)
+// * beta, gamma are the challenges
+// * it returns h in canonical basis
+func computeH(_lz, _lh1, _lh2, _lt, _lf []fr.Element, beta, gamma fr.Element, domainH *fft.Domain) []fr.Element {
+
+	// result
+	s := int(domainH.Cardinality)
+	num := make([]fr.Element, domainH.Cardinality)
+
+	var u, v, w, _g, m, n, one, t fr.Element
+	t.SetUint64(2).
+		Inverse(&t)
+	_g.Square(&domainH.Generator).
+		Exp(_g, big.NewInt(int64(s/2-1)))
+	one.SetOne()
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	g := make([]fr.Element, s)
+	g[0].Set(&domainH.FinerGenerator)
+	for i := 1; i < s; i++ {
+		g[i].Mul(&g[i-1], &domainH.Generator)
+	}
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		// m = (x-g**(n-1))*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX))
+		m.Mul(&v, &_lz[_i])
+		u.Add(&gamma, &_lf[_i])
+		m.Mul(&m, &u)
+		u.Mul(&beta, &_lt[_is]).
+			Add(&u, &_lt[_i]).
+			Add(&u, &w)
+		m.Mul(&m, &u)
+
+		// n = (x-g**(n-1))*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX)
+		n.Mul(&beta, &_lh1[_is]).
+			Add(&n, &_lh1[_i]).
+			Add(&n, &w)
+		u.Mul(&beta, &_lh2[_is]).
+			Add(&u, &_lh2[_i]).
+			Add(&u, &w)
+		n.Mul(&n, &u).
+			Mul(&n, &_lz[_is])
+
+		num[_i].Sub(&m, &n)
+		u.Sub(&g[i], &_g)
+		num[_i].Mul(&num[_i], &u)
+
+	}
+
+	return num
+}
+
+// computeH0 returns l0 * (z-1), in Lagrange basis and bit reversed order
+func computeH0(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(den); i++ {
+		den[i].Sub(&d, &one)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHn returns ln * (z-1), in Lagrange basis and bit reversed order
+func computeHn(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	one.SetOne()
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(lzCosetReversed); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHh1h2 returns ln * (h1 - h2(g.x)), in Lagrange basis and bit reversed order
+func computeHh1h2(_lh1, _lh2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(_lh1))
+	for i := 0; i < len(_lh1); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(_lh1))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	s := len(_lh1)
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		res[_i].Sub(&_lh1[_i], &_lh2[_is]).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeQuotient computes the full quotient of the plookup protocol.
+// * alpha is the challenge to fold the numerator
+// * lh, lh0, lhn, lh1h2 are the various pieces of the numerator (Lagrange shifted form, bit reversed order)
+// * domainH fft domain
+// It returns the quotient, in canonical basis
+func computeQuotient(alpha fr.Element, lh, lh0, lhn, lh1h2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	s := len(lh)
+	res := make([]fr.Element, s)
+
+	var one fr.Element
+	one.SetOne()
+
+	var d [2]fr.Element
+	d[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality>>1)))
+	d[1].Neg(&d[0])
+	d[0].Sub(&d[0], &one).Inverse(&d[0])
+	d[1].Sub(&d[1], &one).Inverse(&d[1])
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+
+		res[_i].Mul(&lh1h2[_i], &alpha).
+			Add(&res[_i], &lhn[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh0[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh[_i]).
+			Mul(&res[_i], &d[i%2])
+	}
+
+	domainH.FFTInverse(res, fft.DIT, 1)
+
+	return res
+}
+
+// ProveLookupVector returns proof that the values in f are in t.
+//
+// /!\IMPORTANT/!\
+//
+// If the table t is already commited somewhere (which is the normal workflow
+// before generating a lookup proof), the commitment needs to be done on the
+// table sorted. Otherwise the commitment in proof.t will not be the same as
+// the public commitment: it will contain the same values, but permuted.
+//
+func ProveLookupVector(srs *kzg.SRS, f, t Table) (ProofLookupVector, error) {
+
+	// res
+	var proof ProofLookupVector
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// create domains
+	var dNum *fft.Domain
+	if len(t) <= len(f) {
+		dNum = fft.NewDomain(uint64(len(f)+1), 0, false)
+	} else {
+		dNum = fft.NewDomain(uint64(len(t)), 0, false)
+	}
+	cardDNum := int(dNum.Cardinality)
+
+	// set the size
+	proof.size = dNum.Cardinality
+
+	// resize f and t
+	// note: the last element of lf does not matter
+	lf := make([]fr.Element, cardDNum)
+	lt := make([]fr.Element, cardDNum)
+	cf := make([]fr.Element, cardDNum)
+	ct := make([]fr.Element, cardDNum)
+	copy(lt, t)
+	copy(lf, f)
+	for i := len(f); i < cardDNum; i++ {
+		lf[i] = f[len(f)-1]
+	}
+	for i := len(t); i < cardDNum; i++ {
+		lt[i] = t[len(t)-1]
+	}
+	sort.Sort(Table(lt))
+	copy(ct, lt)
+	copy(cf, lf)
+	dNum.FFTInverse(ct, fft.DIF, 0)
+	dNum.FFTInverse(cf, fft.DIF, 0)
+	fft.BitReverse(ct)
+	fft.BitReverse(cf)
+	proof.t, err = kzg.Commit(ct, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.f, err = kzg.Commit(cf, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// write f sorted by t
+	lfSortedByt := make(Table, 2*dNum.Cardinality-1)
+	copy(lfSortedByt, lt)
+	copy(lfSortedByt[dNum.Cardinality:], lf)
+	sort.Sort(lfSortedByt)
+
+	// compute h1, h2, commit to them
+	lh1 := make([]fr.Element, cardDNum)
+	lh2 := make([]fr.Element, cardDNum)
+	ch1 := make([]fr.Element, cardDNum)
+	ch2 := make([]fr.Element, cardDNum)
+	copy(lh1, lfSortedByt[:cardDNum])
+	copy(lh2, lfSortedByt[cardDNum-1:])
+
+	copy(ch1, lfSortedByt[:cardDNum])
+	copy(ch2, lfSortedByt[cardDNum-1:])
+	dNum.FFTInverse(ch1, fft.DIF, 0)
+	dNum.FFTInverse(ch2, fft.DIF, 0)
+	fft.BitReverse(ch1)
+	fft.BitReverse(ch2)
+
+	proof.h1, err = kzg.Commit(ch1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.h2, err = kzg.Commit(ch2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive beta, gamma
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return proof, err
+	}
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return proof, err
+	}
+
+	// Compute to Z
+	lz := computeZ(lf, lt, lh1, lh2, beta, gamma)
+	cz := make([]fr.Element, len(lz))
+	copy(cz, lz)
+	dNum.FFTInverse(cz, fft.DIF, 0)
+	fft.BitReverse(cz)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// prepare data for computing the quotient
+	// compute the numerator
+	s := dNum.Cardinality
+	domainH := fft.NewDomain(uint64(2*s), 1, false)
+	_lz := make([]fr.Element, 2*s)
+	_lh1 := make([]fr.Element, 2*s)
+	_lh2 := make([]fr.Element, 2*s)
+	_lt := make([]fr.Element, 2*s)
+	_lf := make([]fr.Element, 2*s)
+	copy(_lz, cz)
+	copy(_lh1, ch1)
+	copy(_lh2, ch2)
+	copy(_lt, ct)
+	copy(_lf, cf)
+	domainH.FFT(_lz, fft.DIF, 1)
+	domainH.FFT(_lh1, fft.DIF, 1)
+	domainH.FFT(_lh2, fft.DIF, 1)
+	domainH.FFT(_lt, fft.DIF, 1)
+	domainH.FFT(_lf, fft.DIF, 1)
+
+	// compute h
+	lh := computeH(_lz, _lh1, _lh2, _lt, _lf, beta, gamma, domainH)
+
+	// compute h0
+	lh0 := computeH0(_lz, domainH)
+
+	// compute hn
+	lhn := computeHn(_lz, domainH)
+
+	// compute hh1h2
+	lh1h2 := computeHh1h2(_lh1, _lh2, domainH)
+
+	// compute the quotient
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+	ch := computeQuotient(alpha, lh, lh0, lhn, lh1h2, domainH)
+	proof.h, err = kzg.Commit(ch, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// build the opening proofs
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return proof, err
+	}
+	proof.BatchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+			cf,
+			ch,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	nu.Mul(&nu, &dNum.Generator)
+	proof.BatchedProofShifted, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	return proof, nil
+}
+
+// VerifyLookupVector verifies that a ProofLookupVector proof is correct
+func VerifyLookupVector(srs *kzg.SRS, proof ProofLookupVector) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// derive the various challenges
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return err
+	}
+
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return err
+	}
+
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return err
+	}
+
+	// check opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&proof.BatchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&proof.BatchedProofShifted,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	// check polynomial relation using Schwartz Zippel
+	var lhs, rhs, nun, g, _g, a, v, w, one fr.Element
+	d := fft.NewDomain(proof.size, 0, false) // only there to access to root of 1...
+	one.SetOne()
+	g.Exp(d.Generator, big.NewInt(int64(d.Cardinality-1)))
+
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	// h(nu) where
+	// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+	//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+	lhs.Sub(&nu, &g).
+		Mul(&lhs, &proof.BatchedProof.ClaimedValues[3]).
+		Mul(&lhs, &v)
+	a.Add(&gamma, &proof.BatchedProof.ClaimedValues[4])
+	lhs.Mul(&lhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[2]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[2]).
+		Add(&a, &w)
+	lhs.Mul(&lhs, &a)
+
+	rhs.Sub(&nu, &g).
+		Mul(&rhs, &proof.BatchedProofShifted.ClaimedValues[3])
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[0]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[0]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[1]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[1]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+
+	lhs.Sub(&lhs, &rhs)
+
+	// check consistancy of bounds
+	var l0, ln, d1, d2 fr.Element
+	l0.Exp(nu, big.NewInt(int64(d.Cardinality))).Sub(&l0, &one)
+	ln.Set(&l0)
+	d1.Sub(&nu, &one)
+	d2.Sub(&nu, &g)
+	l0.Div(&l0, &d1)
+	ln.Div(&ln, &d2)
+
+	// l0*(z-1)
+	var l0z fr.Element
+	l0z.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&l0z, &l0)
+
+	// ln*(z-1)
+	var lnz fr.Element
+	lnz.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&ln, &lnz)
+
+	// ln*(h1 - h2(g.x))
+	var lnh1h2 fr.Element
+	lnh1h2.Sub(&proof.BatchedProof.ClaimedValues[0], &proof.BatchedProofShifted.ClaimedValues[1]).
+		Mul(&lnh1h2, &ln)
+
+	// fold the numerator
+	lnh1h2.Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lnz).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &l0z).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lhs)
+
+	// (x**n-1) * h(x) evaluated at nu
+	nun.Exp(nu, big.NewInt(int64(d.Cardinality)))
+	_g.Sub(&nun, &one)
+	_g.Mul(&proof.BatchedProof.ClaimedValues[5], &_g)
+	if !lnh1h2.Equal(&_g) {
+		return ErrPlookupVerification
+	}
+
+	return nil
+}
diff --git a/ecc/bw6-756/fr/polynomial/doc.go b/ecc/bw6-756/fr/polynomial/doc.go
new file mode 100644
index 000000000..83479b058
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package polynomial provides polynomial methods and commitment schemes.
+package polynomial
diff --git a/ecc/bw6-756/fr/polynomial/polynomial.go b/ecc/bw6-756/fr/polynomial/polynomial.go
new file mode 100644
index 000000000..7f90c5e66
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/polynomial.go
@@ -0,0 +1,123 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// Polynomial polynomial represented by coefficients bn254 fr field.
+type Polynomial []fr.Element
+
+// Degree returns the degree of the polynomial, which is the length of Data.
+func (p *Polynomial) Degree() uint64 {
+	return uint64(len(*p) - 1)
+}
+
+// Eval evaluates p at v
+// returns a fr.Element
+func (p *Polynomial) Eval(v *fr.Element) fr.Element {
+
+	res := (*p)[len(*p)-1]
+	for i := len(*p) - 2; i >= 0; i-- {
+		res.Mul(&res, v)
+		res.Add(&res, &(*p)[i])
+	}
+
+	return res
+}
+
+// Clone returns a copy of the polynomial
+func (p *Polynomial) Clone() Polynomial {
+	_p := make(Polynomial, len(*p))
+	copy(_p, *p)
+	return _p
+}
+
+// AddConstantInPlace adds a constant to the polynomial, modifying p
+func (p *Polynomial) AddConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Add(&(*p)[i], c)
+	}
+}
+
+// SubConstantInPlace subs a constant to the polynomial, modifying p
+func (p *Polynomial) SubConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Sub(&(*p)[i], c)
+	}
+}
+
+// ScaleInPlace multiplies p by v, modifying p
+func (p *Polynomial) ScaleInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Mul(&(*p)[i], c)
+	}
+}
+
+// Add adds p1 to p2
+// This function allocates a new slice unless p == p1 or p == p2
+func (p *Polynomial) Add(p1, p2 Polynomial) *Polynomial {
+
+	bigger := p1
+	smaller := p2
+	if len(bigger) < len(smaller) {
+		bigger, smaller = smaller, bigger
+	}
+
+	if len(*p) == len(bigger) && (&(*p)[0] == &bigger[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &smaller[i])
+		}
+		return p
+	}
+
+	if len(*p) == len(smaller) && (&(*p)[0] == &smaller[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &bigger[i])
+		}
+		*p = append(*p, bigger[len(smaller):]...)
+		return p
+	}
+
+	res := make(Polynomial, len(bigger))
+	copy(res, bigger)
+	for i := 0; i < len(smaller); i++ {
+		res[i].Add(&res[i], &smaller[i])
+	}
+	*p = res
+	return p
+}
+
+// Equal checks equality between two polynomials
+func (p *Polynomial) Equal(p1 Polynomial) bool {
+	if (*p == nil) != (p1 == nil) {
+		return false
+	}
+
+	if len(*p) != len(p1) {
+		return false
+	}
+
+	for i := range p1 {
+		if !(*p)[i].Equal(&p1[i]) {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/ecc/bw6-756/fr/polynomial/polynomial_test.go b/ecc/bw6-756/fr/polynomial/polynomial_test.go
new file mode 100644
index 000000000..9a1298763
--- /dev/null
+++ b/ecc/bw6-756/fr/polynomial/polynomial_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+func TestPolynomialEval(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// random value
+	var point fr.Element
+	point.SetRandom()
+
+	// compute manually f(val)
+	var expectedEval, one, den fr.Element
+	var expo big.Int
+	one.SetOne()
+	expo.SetUint64(20)
+	expectedEval.Exp(point, &expo).
+		Sub(&expectedEval, &one)
+	den.Sub(&point, &one)
+	expectedEval.Div(&expectedEval, &den)
+
+	// compute purported evaluation
+	purportedEval := f.Eval(&point)
+
+	// check
+	if !purportedEval.Equal(&expectedEval) {
+		t.Fatal("polynomial evaluation failed")
+	}
+}
+
+func TestPolynomialAddConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to add
+	var c fr.Element
+	c.SetRandom()
+
+	// add constant
+	f.AddConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Add(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("AddConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialSubConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to sub
+	var c fr.Element
+	c.SetRandom()
+
+	// sub constant
+	f.SubConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Sub(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("SubConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialScaleInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to scale by
+	var c fr.Element
+	c.SetRandom()
+
+	// scale by constant
+	f.ScaleInPlace(&c)
+
+	// check
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&c) {
+			t.Fatal("ScaleInPlace failed")
+		}
+	}
+
+}
+
+func TestPolynomialAdd(t *testing.T) {
+
+	// build unbalanced polynomials
+	f1 := make(Polynomial, 20)
+	f1Backup := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f1[i].SetOne()
+		f1Backup[i].SetOne()
+	}
+	f2 := make(Polynomial, 10)
+	f2Backup := make(Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f2[i].SetOne()
+		f2Backup[i].SetOne()
+	}
+
+	// expected result
+	var one, two fr.Element
+	one.SetOne()
+	two.Double(&one)
+	expectedSum := make(Polynomial, 20)
+	for i := 0; i < 10; i++ {
+		expectedSum[i].Set(&two)
+	}
+	for i := 10; i < 20; i++ {
+		expectedSum[i].Set(&one)
+	}
+
+	// caller is empty
+	var g Polynomial
+	g.Add(f1, f2)
+	if !g.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// all operands are distincts
+	_f1 := f1.Clone()
+	_f1.Add(f1, f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// first operand = caller
+	_f1 = f1.Clone()
+	_f2 := f2.Clone()
+	_f1.Add(_f1, _f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+
+	// second operand = caller
+	_f1 = f1.Clone()
+	_f2 = f2.Clone()
+	_f1.Add(_f2, _f1)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+}
diff --git a/ecc/bw6-756/fuzz.go b/ecc/bw6-756/fuzz.go
new file mode 100644
index 000000000..f00846392
--- /dev/null
+++ b/ecc/bw6-756/fuzz.go
@@ -0,0 +1,76 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr/mimc"
+	"math/big"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	// TODO separate in multiple FuzzXXX and update continuous fuzzer scripts
+	// else, we don't really benefits for fuzzer strategy.
+	fr.Fuzz(data)
+	fp.Fuzz(data)
+	mimc.Fuzz(data)
+
+	// fuzz pairing
+	r := bytes.NewReader(data)
+	var e1, e2 fr.Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		var r, r1, r2, r1r2, zero GT
+		var b1, b2, b1b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		e2.ToBigIntRegular(&b2)
+		b1b2.Mul(&b1, &b2)
+
+		var p1 G1Affine
+		var p2 G2Affine
+
+		p1.ScalarMultiplication(&g1GenAff, &b1)
+		p2.ScalarMultiplication(&g2GenAff, &b2)
+
+		r, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+		r1, _ = Pair([]G1Affine{p1}, []G2Affine{g2GenAff})
+		r2, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{p2})
+
+		r1r2.Exp(&r, b1b2)
+		r1.Exp(&r1, b2)
+		r2.Exp(&r2, b1)
+
+		if !(r1r2.Equal(&r1) && r1r2.Equal(&r2) && !r.Equal(&zero)) {
+			panic("pairing bilinearity check failed")
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bw6-756/fuzz_test.go b/ecc/bw6-756/fuzz_test.go
new file mode 100644
index 000000000..583d7dece
--- /dev/null
+++ b/ecc/bw6-756/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bw6-756/g1.go b/ecc/bw6-756/g1.go
new file mode 100644
index 000000000..2a1f0a5bf
--- /dev/null
+++ b/ecc/bw6-756/g1.go
@@ -0,0 +1,1081 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G1Affine point in affine coordinates
+type G1Affine struct {
+	X, Y fp.Element
+}
+
+// G1Jac is a point with fp.Element coordinates
+type G1Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g1JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g1JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// g1Proj point in projective coordinates
+type g1Proj struct {
+	x, y, z fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G1Affine) Set(a *G1Affine) *G1Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G1Affine) ScalarMultiplication(a *G1Affine, s *big.Int) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Add(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Sub(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G1Affine) Equal(a *G1Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G1Affine) Neg(a *G1Affine) *G1Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G1Affine) FromJacobian(p1 *G1Jac) *G1Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G1Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G1Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Affine) IsOnCurve() bool {
+	var point G1Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G1Affine) IsInSubGroup() bool {
+	var _p G1Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G1Jac) Set(a *G1Jac) *G1Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G1Jac) Equal(a *G1Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+
+	_a := G1Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G1Jac) Neg(a *G1Jac) *G1Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G1Jac) SubAssign(a *G1Jac) *G1Jac {
+	var tmp G1Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G1Jac) AddAssign(a *G1Jac) *G1Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G1Jac) AddMixed(a *G1Affine) *G1Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) Double(q *G1Jac) *G1Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) DoubleAssign() *G1Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) ScalarMultiplication(a *G1Jac, s *big.Int) *G1Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G1Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G1Jac) FromAffine(Q *G1Affine) *G1Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG1Affine, 1] is the kernel
+// of (u,v)->u+lambdaG1Affinev mod r. Expressing r, lambdaG1Affine as
+// polynomials in x, a short vector of this Zmodule is
+// (x+1), (x**3-x**2+1). So we check that (x+1)p+(x**3-x**2+1)*phi(p)
+// is the infinity.
+func (p *G1Jac) IsInSubGroup() bool {
+
+	var res, phip G1Jac
+	phip.phi(p)
+	res.ScalarMultiplication(&phip, &xGen).
+		SubAssign(&phip).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(&phip)
+
+	phip.ScalarMultiplication(p, &xGen).AddAssign(p).AddAssign(&res)
+
+	return phip.IsOnCurve() && phip.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G1Jac) mulWindowed(a *G1Jac, s *big.Int) *G1Jac {
+
+	var res G1Jac
+	var ops [3]G1Jac
+
+	res.Set(&g1Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G1Jac) phi(a *G1Jac) *G1Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG1)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
+
+	var table [15]G1Jac
+	var res G1Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g1Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in E(Fp) to E(Fp)[r]
+func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
+
+	var L0, L1, uP, u2P, u3P, tmp G1Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+
+	L0.Set(a).AddAssign(&u3P).
+		SubAssign(&u2P)
+	tmp.Set(a).AddAssign(&u2P).
+		SubAssign(&uP).
+		SubAssign(&uP).
+		Double(&tmp)
+	L0.SubAssign(&tmp).
+		SubAssign(a)
+
+	L1.Set(a).AddAssign(&uP)
+	tmp.Set(&uP).SubAssign(a).
+		Double(&tmp).
+		SubAssign(&u2P)
+	L1.AddAssign(&tmp).
+		SubAssign(a)
+
+	p.phi(&L1).
+		AddAssign(&L0)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g1JacExtended) Set(a *g1JacExtended) *g1JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g1JacExtended) setInfinity() *g1JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G1Affine) fromJacExtended(Q *g1JacExtended) *G1Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G1Jac) fromJacExtended(Q *g1JacExtended) *G1Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g1Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G1Jac) unsafeFromJacExtended(Q *g1JacExtended) *G1Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g1JacExtended) add(q *g1JacExtended) *g1JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) double(q *g1JacExtended) *g1JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) subMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) addMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g1JacExtended) doubleNegMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) doubleMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// -------------------------------------------------------------------------------------------------
+// Homogenous projective
+
+// Set sets p to the provided point
+func (p *g1Proj) Set(a *g1Proj) *g1Proj {
+	p.x, p.y, p.z = a.x, a.y, a.z
+	return p
+}
+
+// Neg computes -G
+func (p *g1Proj) Neg(a *g1Proj) *g1Proj {
+	*p = *a
+	p.y.Neg(&a.y)
+	return p
+}
+
+// FromJacobian converts a point from Jacobian to projective coordinates
+func (p *g1Proj) FromJacobian(Q *G1Jac) *g1Proj {
+	var buf fp.Element
+	buf.Square(&Q.Z)
+
+	p.x.Mul(&Q.X, &Q.Z)
+	p.y.Set(&Q.Y)
+	p.z.Mul(&Q.Z, &buf)
+
+	return p
+}
+
+// FromAffine sets p = Q, p in homogenous projective, Q in affine
+func (p *g1Proj) FromAffine(Q *G1Affine) *g1Proj {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.z.SetZero()
+		p.x.SetOne()
+		p.y.SetOne()
+		return p
+	}
+	p.z.SetOne()
+	p.x.Set(&Q.X)
+	p.y.Set(&Q.Y)
+	return p
+}
+
+// BatchProjectiveToAffineG1 converts points in Projective coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchProjectiveToAffineG1(points []g1Proj, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			a := result[i].X
+			result[i].X.Mul(&points[i].x, &a)
+			result[i].Y.Mul(&points[i].y, &a)
+		}
+	})
+}
+
+// BatchJacobianToAffineG1 converts points in Jacobian coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchJacobianToAffineG1(points []G1Jac, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].Z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].Z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].Z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			var a, b fp.Element
+			a = result[i].X
+			b.Square(&a)
+			result[i].X.Mul(&points[i].X, &b)
+			result[i].Y.Mul(&points[i].Y, &b).
+				Mul(&result[i].Y, &a)
+		}
+	})
+
+}
+
+// BatchScalarMultiplicationG1 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G1Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g1Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	// convert our base exp table into affine to use AddMixed
+	baseTableAff := make([]G1Affine, (1 << (c - 1)))
+	BatchJacobianToAffineG1(baseTable, baseTableAff)
+	toReturn := make([]G1Jac, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G1Jac
+		for i := start; i < end; i++ {
+			p.Set(&g1Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddMixed(&baseTableAff[bits-1])
+				} else {
+					// sub
+					t := baseTableAff[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddMixed(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i] = p
+
+		}
+	})
+	toReturnAff := make([]G1Affine, len(scalars))
+	BatchJacobianToAffineG1(toReturn, toReturnAff)
+	return toReturnAff
+}
diff --git a/ecc/bw6-756/g1_test.go b/ecc/bw6-756/g1_test.go
new file mode 100644
index 000000000..a38dbeb3a
--- /dev/null
+++ b/ecc/bw6-756/g1_test.go
@@ -0,0 +1,664 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG1AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G1Jac
+			p = fuzzJacobianG1Affine(&g1Gen, a)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G1Jac
+			p = fuzzJacobianG1Affine(&g1Gen, a)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG1Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG1Svdw(a)
+			g2 := MapToCurveG1Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] g1Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Affine
+			op1.FromJacobian(&g1Gen)
+			op2.FromJacobian(&g1Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] g1Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G1Jac
+			op1.Set(&g1Gen)
+			op3.Set(&g1Gen)
+
+			op2 = fuzzJacobianG1Affine(&g1Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG1Affine(&g1Gen, a)
+			var op1 G1Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g1JacExtended
+			g.X.Set(&g1Gen.X)
+			g.Y.Set(&g1Gen.Y)
+			g.ZZ.Set(&g1Gen.Z)
+			g.ZZZ.Set(&g1Gen.Z)
+			gfuzz := fuzzExtendedJacobianG1Affine(&g, a)
+
+			var op1 G1Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G1Jac
+			var op1 G1Affine
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g1Gen.X) && g.Y.Equal(&g1Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G1Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			var op1 g1JacExtended
+			var zero fp.Element
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Jac
+			var op1 g1JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG1Affine(&g1Gen, a)
+			op2 := fuzzJacobianG1Affine(&g1Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BW6-756] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			var op1, op2 G1Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.AddAssign(&g1Infinity)
+			var op2 G1Jac
+			op2.Set(&g1Infinity)
+			op2.AddAssign(&g1Gen)
+			return fop1.Equal(&g1Gen) && op2.Equal(&g1Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G1Affine
+			op2.FromJacobian(&g1Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g1Gen, &scalar)
+			op2.mulWindowed(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g1Gen, &scalar)
+			op2.ScalarMultiplication(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G1Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g1Gen, &r)
+			op2.mulGLV(&g1Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g1Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G1Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g1Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG1AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BW6-756] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G1Jac
+				var expected G1Affine
+				var b big.Int
+				expectedJac.mulGLV(&g1Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG1JacIsInSubGroup(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG1AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG1JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G1Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g1Gen, &scalar)
+		}
+	})
+
+	var glv G1Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g1Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG1AffineCofactorClearing(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG1JacAdd(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g1Gen)
+	}
+}
+
+func BenchmarkG1JacAddMixed(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG1JacDouble(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG1JacExtAddMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtSubMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleNegMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtAdd(b *testing.B) {
+	var a, c g1JacExtended
+	a.doubleMixed(&g1GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG1JacExtDouble(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG1Affine(p *G1Jac, f fp.Element) G1Jac {
+	var res G1Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG1Affine(p *g1JacExtended, f fp.Element) g1JacExtended {
+	var res g1JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bw6-756/g2.go b/ecc/bw6-756/g2.go
new file mode 100644
index 000000000..934f7d6f1
--- /dev/null
+++ b/ecc/bw6-756/g2.go
@@ -0,0 +1,933 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G2Affine point in affine coordinates
+type G2Affine struct {
+	X, Y fp.Element
+}
+
+// G2Jac is a point with fp.Element coordinates
+type G2Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g2JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g2JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G2Affine) Set(a *G2Affine) *G2Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G2Affine) ScalarMultiplication(a *G2Affine, s *big.Int) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Add(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Sub(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G2Affine) Equal(a *G2Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G2Affine) Neg(a *G2Affine) *G2Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G2Affine) FromJacobian(p1 *G2Jac) *G2Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G2Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G2Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Affine) IsOnCurve() bool {
+	var point G2Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G2Affine) IsInSubGroup() bool {
+	var _p G2Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G2Jac) Set(a *G2Jac) *G2Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G2Jac) Equal(a *G2Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+
+	_a := G2Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G2Jac) Neg(a *G2Jac) *G2Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G2Jac) SubAssign(a *G2Jac) *G2Jac {
+	var tmp G2Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G2Jac) AddAssign(a *G2Jac) *G2Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G2Jac) AddMixed(a *G2Affine) *G2Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) Double(q *G2Jac) *G2Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) DoubleAssign() *G2Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) ScalarMultiplication(a *G2Jac, s *big.Int) *G2Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G2Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G2Jac) FromAffine(Q *G2Affine) *G2Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bTwistCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG2Affine, 1] is the kernel
+// of (u,v)->u+lambdaG2Affinev mod r. Expressing r, lambdaG2Affine as
+// polynomials in x, a short vector of this Zmodule is
+// (x+1), (x**3-x**2+1). So we check that (x+1)p+(x**3-x**2+1)*phi(p)
+// is the infinity.
+func (p *G2Jac) IsInSubGroup() bool {
+
+	var res, phip G2Jac
+	phip.phi(p)
+	res.ScalarMultiplication(&phip, &xGen).
+		SubAssign(&phip).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(&phip)
+
+	phip.ScalarMultiplication(p, &xGen).AddAssign(p).AddAssign(&res)
+
+	return phip.IsOnCurve() && phip.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G2Jac) mulWindowed(a *G2Jac, s *big.Int) *G2Jac {
+
+	var res G2Jac
+	var ops [3]G2Jac
+
+	res.Set(&g2Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G2Jac) phi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG2)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
+
+	var table [15]G2Jac
+	var res G2Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g2Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
+
+	var L0, L1, uP, u2P, u3P, tmp G2Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+	// ht=-2, hy=0
+	// d1=1, d2=-1, d3=-1
+
+	L0.Set(a).
+		AddAssign(&u2P).
+		SubAssign(&uP)
+	tmp.Set(&u2P).
+		AddAssign(a).
+		SubAssign(&uP).
+		Double(&tmp)
+	L1.Set(&u3P).
+		SubAssign(&tmp)
+
+	p.phi(&L0).
+		AddAssign(&L1)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g2JacExtended) Set(a *g2JacExtended) *g2JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g2JacExtended) setInfinity() *g2JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G2Affine) fromJacExtended(Q *g2JacExtended) *G2Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G2Jac) fromJacExtended(Q *g2JacExtended) *G2Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g2Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G2Jac) unsafeFromJacExtended(Q *g2JacExtended) *G2Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g2JacExtended) add(q *g2JacExtended) *g2JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) double(q *g2JacExtended) *g2JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) subMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) addMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g2JacExtended) doubleNegMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) doubleMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// BatchScalarMultiplicationG2 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG2(base *G2Affine, scalars []fr.Element) []G2Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G2Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g2Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	toReturn := make([]G2Affine, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G2Jac
+		for i := start; i < end; i++ {
+			p.Set(&g2Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddAssign(&baseTable[bits-1])
+				} else {
+					// sub
+					t := baseTable[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddAssign(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i].FromJacobian(&p)
+
+		}
+	})
+	return toReturn
+}
diff --git a/ecc/bw6-756/g2_test.go b/ecc/bw6-756/g2_test.go
new file mode 100644
index 000000000..c2fe39936
--- /dev/null
+++ b/ecc/bw6-756/g2_test.go
@@ -0,0 +1,664 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG2AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G2Jac
+			p = fuzzJacobianG2Affine(&g2Gen, a)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G2Jac
+			p = fuzzJacobianG2Affine(&g2Gen, a)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG2Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG2Svdw(a)
+			g2 := MapToCurveG2Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] g2Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G2Affine
+			op1.FromJacobian(&g2Gen)
+			op2.FromJacobian(&g2Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] g2Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G2Jac
+			op1.Set(&g2Gen)
+			op3.Set(&g2Gen)
+
+			op2 = fuzzJacobianG2Affine(&g2Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG2Affine(&g2Gen, a)
+			var op1 G2Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g2JacExtended
+			g.X.Set(&g2Gen.X)
+			g.Y.Set(&g2Gen.Y)
+			g.ZZ.Set(&g2Gen.Z)
+			g.ZZZ.Set(&g2Gen.Z)
+			gfuzz := fuzzExtendedJacobianG2Affine(&g, a)
+
+			var op1 G2Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G2Jac
+			var op1 G2Affine
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g2Gen.X) && g.Y.Equal(&g2Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G2Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			var op1 g2JacExtended
+			var zero fp.Element
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Jac
+			var op1 g2JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG2Affine(&g2Gen, a)
+			op2 := fuzzJacobianG2Affine(&g2Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BW6-756] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			var op1, op2 G2Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.AddAssign(&g2Infinity)
+			var op2 G2Jac
+			op2.Set(&g2Infinity)
+			op2.AddAssign(&g2Gen)
+			return fop1.Equal(&g2Gen) && op2.Equal(&g2Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G2Affine
+			op2.FromJacobian(&g2Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g2Gen, &scalar)
+			op2.mulWindowed(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g2Gen, &scalar)
+			op2.ScalarMultiplication(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BW6-756] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G2Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g2Gen, &r)
+			op2.mulGLV(&g2Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g2Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BW6-756] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G2Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g2Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG2AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BW6-756] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G2Jac
+				var expected G2Affine
+				var b big.Int
+				expectedJac.mulGLV(&g2Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG2JacIsInSubGroup(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG2AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG2JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G2Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g2Gen, &scalar)
+		}
+	})
+
+	var glv G2Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g2Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG2AffineCofactorClearing(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG2JacAdd(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g2Gen)
+	}
+}
+
+func BenchmarkG2JacAddMixed(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG2JacDouble(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG2JacExtAddMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtSubMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleNegMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtAdd(b *testing.B) {
+	var a, c g2JacExtended
+	a.doubleMixed(&g2GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG2JacExtDouble(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG2Affine(p *G2Jac, f fp.Element) G2Jac {
+	var res G2Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG2Affine(p *g2JacExtended, f fp.Element) g2JacExtended {
+	var res g2JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bw6-756/hash_to_curve.go b/ecc/bw6-756/hash_to_curve.go
new file mode 100644
index 000000000..f98291369
--- /dev/null
+++ b/ecc/bw6-756/hash_to_curve.go
@@ -0,0 +1,262 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+)
+
+// hashToFp hashes msg to count prime field elements.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
+func hashToFp(msg, dst []byte, count int) ([]fp.Element, error) {
+
+	// 128 bits of security
+	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
+	L := 64
+
+	lenInBytes := count * L
+	pseudoRandomBytes, err := ecc.ExpandMsgXmd(msg, dst, lenInBytes)
+	if err != nil {
+		return nil, err
+	}
+
+	res := make([]fp.Element, count)
+	for i := 0; i < count; i++ {
+		res[i].SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
+	}
+	return res, nil
+}
+
+// returns false if u>-u when seen as a bigInt
+func sign0(u fp.Element) bool {
+	var a, b big.Int
+	u.ToBigIntRegular(&a)
+	u.Neg(&u)
+	u.ToBigIntRegular(&b)
+	return a.Cmp(&b) <= 0
+}
+
+// ----------------------------------------------------------------------------------------
+// G1Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG1(u fp.Element) G1Affine {
+
+	var res G1Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetUint64(2)
+	c2.SetString("183162695478688143295363277863609973912688910644623094139398704891720872678025228163994673580388732642095427562821043430262853248964259401622004374680181856276883253377613672296702199391943428932630544113135668167634206718951424")
+	c3.SetString("44724918635541747613641388754122468373124590147190123301097028599548226344531294896393070481114654454458200984945494713184328466922237226071123682508438597467447523336749298101207214456752293854953034448110052217888788253840855")
+	c4.SetString("244216927304917524393817703818146631883585214192830792185864939855627830237366970885326231440518310189460570083761391240350470998619012535496005832906909141702511004503484896395602932522591238576840725484180890890178942291935230")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bCurveCoeff)
+	E2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if E2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG1Svdw(t fp.Element) G1Affine {
+	res := svdwMapG1(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG1Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG1Svdw(u[0])
+	Q1 := MapToCurveG1Svdw(u[1])
+	var _Q0, _Q1, _res G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
+
+// ----------------------------------------------------------------------------------------
+// G2Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG2(u fp.Element) G2Affine {
+
+	var res G2Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetString("34")
+	c2.SetString("183162695478688143295363277863609973912688910644623094139398704891720872678025228163994673580388732642095427562821043430262853248964259401622004374680181856276883253377613672296702199391943428932630544113135668167634206718951424")
+	c3.SetString("146433077860977604767773731846846066128329453951205949309133381060660557769470436688115507478076238051302460346380778731189432948203027606405253627801253330472118089084289142874549020683770046250396163515340816807916474328196956")
+	c4.SetString("122108463652458762196908851909073315941792607096415396092932469927813915118683485442663115720259155094730285041880695620175235499309506267748002916453454570851255502251742448197801466261295619288420362742090445445089471145967571")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bTwistCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bTwistCurveCoeff)
+	E2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if E2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bTwistCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG2Svdw(t fp.Element) G2Affine {
+	res := svdwMapG2(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG2Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG2Svdw(u[0])
+	Q1 := MapToCurveG2Svdw(u[1])
+	var _Q0, _Q1, _res G2Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
diff --git a/ecc/bw6-756/internal/fptower/e3.go b/ecc/bw6-756/internal/fptower/e3.go
new file mode 100644
index 000000000..81bf3b878
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e3.go
@@ -0,0 +1,299 @@
+// Copyright 2020 ConsenSys AG
+//
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+)
+
+// E3 is a degree-three finite field extension of fp3
+type E3 struct {
+	A0, A1, A2 fp.Element
+}
+
+// Equal returns true if z equals x, fasle otherwise
+// note this is more efficient than calling "z == x"
+func (z *E3) Equal(x *E3) bool {
+	return z.A0.Equal(&x.A0) && z.A1.Equal(&x.A1) && z.A2.Equal(&x.A2)
+}
+
+// SetString sets a E3 elmt from string
+func (z *E3) SetString(s1, s2, s3 string) *E3 {
+	z.A0.SetString(s1)
+	z.A1.SetString(s2)
+	z.A2.SetString(s3)
+	return z
+}
+
+// SetZero sets an E3 elmt to zero
+func (z *E3) SetZero() *E3 {
+	*z = E3{}
+	return z
+}
+
+// Clone returns a copy of self
+func (z *E3) Clone() *E3 {
+	return &E3{
+		A0: z.A0,
+		A1: z.A1,
+		A2: z.A2,
+	}
+}
+
+// Set Sets a E3 elmt form another E3 elmt
+func (z *E3) Set(x *E3) *E3 {
+	*z = *x
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E3) SetOne() *E3 {
+	z.A0.SetOne()
+	z.A1.SetZero()
+	z.A2.SetZero()
+	return z
+}
+
+// SetRandom set z to a random elmt
+func (z *E3) SetRandom() (*E3, error) {
+	if _, err := z.A0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A1.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A2.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// IsZero returns true if the two elements are equal, fasle otherwise
+func (z *E3) IsZero() bool {
+	return z.A0.IsZero() && z.A1.IsZero() && z.A2.IsZero()
+}
+
+// Neg negates the E3 number
+func (z *E3) Neg(x *E3) *E3 {
+	z.A0.Neg(&x.A0)
+	z.A1.Neg(&x.A1)
+	z.A2.Neg(&x.A2)
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E3) ToMont() *E3 {
+	z.A0.ToMont()
+	z.A1.ToMont()
+	z.A2.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E3) FromMont() *E3 {
+	z.A0.FromMont()
+	z.A1.FromMont()
+	z.A2.FromMont()
+	return z
+}
+
+// Add adds two elements of E3
+func (z *E3) Add(x, y *E3) *E3 {
+	z.A0.Add(&x.A0, &y.A0)
+	z.A1.Add(&x.A1, &y.A1)
+	z.A2.Add(&x.A2, &y.A2)
+	return z
+}
+
+// Sub two elements of E3
+func (z *E3) Sub(x, y *E3) *E3 {
+	z.A0.Sub(&x.A0, &y.A0)
+	z.A1.Sub(&x.A1, &y.A1)
+	z.A2.Sub(&x.A2, &y.A2)
+	return z
+}
+
+// Double doubles an element in E3
+func (z *E3) Double(x *E3) *E3 {
+	z.A0.Double(&x.A0)
+	z.A1.Double(&x.A1)
+	z.A2.Double(&x.A2)
+	return z
+}
+
+// String puts E3 elmt in string form
+func (z *E3) String() string {
+	return (z.A0.String() + "+(" + z.A1.String() + ")*u+(" + z.A2.String() + ")*u**2")
+}
+
+// Conjugate conjugates an element in E3
+func (z *E3) Conjugate(x *E3) *E3 {
+	*z = *x
+	z.A1.Neg(&z.A1)
+	return z
+}
+
+// MulByElement multiplies an element in E3 by an element in fp
+func (z *E3) MulByElement(x *E3, y *fp.Element) *E3 {
+	_y := *y
+	z.A0.Mul(&x.A0, &_y)
+	z.A1.Mul(&x.A1, &_y)
+	z.A2.Mul(&x.A2, &_y)
+	return z
+}
+
+// MulBy01 multiplication by sparse element (c0,c1,0)
+func (z *E3) MulBy01(c0, c1 *fp.Element) *E3 {
+
+	var a, b, tmp, t0, t1, t2 fp.Element
+
+	a.Mul(&z.A0, c0)
+	b.Mul(&z.A1, c1)
+
+	tmp.Add(&z.A1, &z.A2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+	t0.Add(&t0, &a)
+
+	tmp.Add(&z.A0, &z.A2)
+	t2.Mul(c0, &tmp)
+	t2.Sub(&t2, &a)
+	t2.Add(&t2, &b)
+
+	t1.Add(c0, c1)
+	tmp.Add(&z.A0, &z.A1)
+	t1.Mul(&t1, &tmp)
+	t1.Sub(&t1, &a)
+	t1.Sub(&t1, &b)
+
+	z.A0.Set(&t0)
+	z.A1.Set(&t1)
+	z.A2.Set(&t2)
+
+	return z
+}
+
+// MulBy1 multiplication of E6 by sparse element (0, c1, 0)
+func (z *E3) MulBy1(c1 *fp.Element) *E3 {
+
+	var b, tmp, t0, t1 fp.Element
+	b.Mul(&z.A1, c1)
+
+	tmp.Add(&z.A1, &z.A2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+
+	tmp.Add(&z.A0, &z.A1)
+	t1.Mul(c1, &tmp)
+	t1.Sub(&t1, &b)
+
+	z.A0.Set(&t0)
+	z.A1.Set(&t1)
+	z.A2.Set(&b)
+
+	return z
+}
+
+// Mul sets z to the E3-product of x,y, returns z
+func (z *E3) Mul(x, y *E3) *E3 {
+	// Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
+	var t0, t1, t2, c0, c1, c2, tmp fp.Element
+	t0.Mul(&x.A0, &y.A0)
+	t1.Mul(&x.A1, &y.A1)
+	t2.Mul(&x.A2, &y.A2)
+
+	c0.Add(&x.A1, &x.A2)
+	tmp.Add(&y.A1, &y.A2)
+	c0.Mul(&c0, &tmp).Sub(&c0, &t1).Sub(&c0, &t2).MulByNonResidue(&c0)
+
+	tmp.Add(&x.A0, &x.A2)
+	c2.Add(&y.A0, &y.A2).Mul(&c2, &tmp).Sub(&c2, &t0).Sub(&c2, &t2)
+
+	c1.Add(&x.A0, &x.A1)
+	tmp.Add(&y.A0, &y.A1)
+	c1.Mul(&c1, &tmp).Sub(&c1, &t0).Sub(&c1, &t1)
+	t2.MulByNonResidue(&t2)
+
+	z.A0.Add(&c0, &t0)
+	z.A1.Add(&c1, &t2)
+	z.A2.Add(&c2, &t1)
+
+	return z
+}
+
+// MulAssign sets z to the E3-product of z,y, returns z
+func (z *E3) MulAssign(x *E3) *E3 {
+	return z.Mul(z, x)
+}
+
+// Square sets z to the E3-product of x,x, returns z
+func (z *E3) Square(x *E3) *E3 {
+
+	// Algorithm 16 from https://eprint.iacr.org/2010/354.pdf
+	var c4, c5, c1, c2, c3, c0, c6 fp.Element
+
+	c6.Double(&x.A1)
+	c4.Mul(&x.A0, &c6) // x.A0 * xA1 * 2
+	c5.Square(&x.A2)
+	c1.MulByNonResidue(&c5).Add(&c1, &c4)
+	c2.Sub(&c4, &c5)
+
+	c3.Square(&x.A0)
+	c4.Sub(&x.A0, &x.A1).Add(&c4, &x.A2)
+	c5.Mul(&c6, &x.A2) // x.A1 * xA2 * 2
+	c4.Square(&c4)
+	c0.MulByNonResidue(&c5)
+	c4.Add(&c4, &c5).Sub(&c4, &c3)
+
+	z.A0.Add(&c0, &c3)
+	z.A1 = c1
+	z.A2.Add(&c2, &c4)
+
+	return z
+}
+
+// MulByNonResidue mul x by (0,1,0)
+func (z *E3) MulByNonResidue(x *E3) *E3 {
+	z.A2, z.A1, z.A0 = x.A1, x.A0, x.A2
+	z.A0.MulByNonResidue(&z.A0)
+	return z
+}
+
+// Inverse an element in E3
+func (z *E3) Inverse(x *E3) *E3 {
+	// Algorithm 17 from https://eprint.iacr.org/2010/354.pdf
+	// step 9 is wrong in the paper it's t1-t4
+	var t0, t1, t2, t3, t4, t5, t6, c0, c1, c2, d1, d2 fp.Element
+	t0.Square(&x.A0)
+	t1.Square(&x.A1)
+	t2.Square(&x.A2)
+	t3.Mul(&x.A0, &x.A1)
+	t4.Mul(&x.A0, &x.A2)
+	t5.Mul(&x.A1, &x.A2)
+	c0.MulByNonResidue(&t5).Neg(&c0).Add(&c0, &t0)
+	c1.MulByNonResidue(&t2).Sub(&c1, &t3)
+	c2.Sub(&t1, &t4)
+	t6.Mul(&x.A0, &c0)
+	d1.Mul(&x.A2, &c1)
+	d2.Mul(&x.A1, &c2)
+	d1.Add(&d1, &d2).MulByNonResidue(&d1)
+	t6.Add(&t6, &d1)
+	t6.Inverse(&t6)
+	z.A0.Mul(&c0, &t6)
+	z.A1.Mul(&c1, &t6)
+	z.A2.Mul(&c2, &t6)
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/e3_test.go b/ecc/bw6-756/internal/fptower/e3_test.go
new file mode 100644
index 000000000..87e783576
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e3_test.go
@@ -0,0 +1,330 @@
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE3ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE3()
+	genB := GenE3()
+	genfp := GenFp()
+
+	properties.Property("[BW756] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Having the receiver as operand (mul by element) should output the same result", prop.ForAll(
+		func(a *E3, b fp.Element) bool {
+			var c E3
+			c.MulByElement(a, &b)
+			a.MulByElement(a, &b)
+			return a.Equal(&c)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE3Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE3()
+	genB := GenE3()
+	genfp := GenFp()
+
+	properties.Property("[BW756] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E3) bool {
+			var c E3
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E3) bool {
+			var c, d E3
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW756] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] square and mul should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b, c E3
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] MulByElement MulByElement inverse should leave an element invariant", prop.ForAll(
+		func(a *E3, b fp.Element) bool {
+			var c E3
+			var d fp.Element
+			d.Inverse(&b)
+			c.MulByElement(a, &b).MulByElement(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BW756] Double and mul by 2 should output the same result", prop.ForAll(
+		func(a *E3) bool {
+			var b E3
+			var c fp.Element
+			c.SetUint64(2)
+			b.Double(a)
+			a.MulByElement(a, &c)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] Mulbynonres should be the same as multiplying by (0,1)", prop.ForAll(
+		func(a *E3) bool {
+			var b, c, d E3
+			b.A1.SetOne()
+			c.MulByNonResidue(a)
+			d.Mul(a, &b)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW756] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E3) bool {
+			var b, c, d E3
+			var e, f fp.Element
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.A0)
+			f.Double(&a.A1)
+			return c.A1.IsZero() && d.A0.IsZero() && e.Equal(&c.A0) && f.Equal(&d.A1)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE3Add(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE3Sub(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE3Mul(b *testing.B) {
+	var a, c E3
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE3MulByElement(b *testing.B) {
+	var a E3
+	var c fp.Element
+	c.SetRandom()
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByElement(&a, &c)
+	}
+}
+
+func BenchmarkE3Square(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE3Inverse(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE3MulNonRes(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidue(&a)
+	}
+}
+
+func BenchmarkE3Conjugate(b *testing.B) {
+	var a E3
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
diff --git a/ecc/bw6-756/internal/fptower/e6.go b/ecc/bw6-756/internal/fptower/e6.go
new file mode 100644
index 000000000..7a794fb0c
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6.go
@@ -0,0 +1,412 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"errors"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// E6 is a degree two finite field extension of fp3
+type E6 struct {
+	B0, B1 E3
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E6) Equal(x *E6) bool {
+	return z.B0.Equal(&x.B0) && z.B1.Equal(&x.B1)
+}
+
+// String puts E6 in string form
+func (z *E6) String() string {
+	return (z.B0.String() + "+(" + z.B1.String() + ")*v")
+}
+
+// SetString sets a E6 from string
+func (z *E6) SetString(s0, s1, s2, s3, s4, s5 string) *E6 {
+	z.B0.SetString(s0, s1, s2)
+	z.B1.SetString(s3, s4, s5)
+	return z
+}
+
+// Set copies x into z and returns z
+func (z *E6) Set(x *E6) *E6 {
+	*z = *x
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E6) SetOne() *E6 {
+	*z = E6{}
+	z.B0.A0.SetOne()
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E6) ToMont() *E6 {
+	z.B0.ToMont()
+	z.B1.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E6) FromMont() *E6 {
+	z.B0.FromMont()
+	z.B1.FromMont()
+	return z
+}
+
+// Add set z=x+y in E6 and return z
+func (z *E6) Add(x, y *E6) *E6 {
+	z.B0.Add(&x.B0, &y.B0)
+	z.B1.Add(&x.B1, &y.B1)
+	return z
+}
+
+// Sub sets z to x sub y and return z
+func (z *E6) Sub(x, y *E6) *E6 {
+	z.B0.Sub(&x.B0, &y.B0)
+	z.B1.Sub(&x.B1, &y.B1)
+	return z
+}
+
+// Double sets z=2*x and returns z
+func (z *E6) Double(x *E6) *E6 {
+	z.B0.Double(&x.B0)
+	z.B1.Double(&x.B1)
+	return z
+}
+
+// SetRandom used only in tests
+func (z *E6) SetRandom() (*E6, error) {
+	if _, err := z.B0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Mul set z=x*y in E6 and return z
+func (z *E6) Mul(x, y *E6) *E6 {
+	var a, b, c E3
+	a.Add(&x.B0, &x.B1)
+	b.Add(&y.B0, &y.B1)
+	a.Mul(&a, &b)
+	b.Mul(&x.B0, &y.B0)
+	c.Mul(&x.B1, &y.B1)
+	z.B1.Sub(&a, &b).Sub(&z.B1, &c)
+	z.B0.MulByNonResidue(&c).Add(&z.B0, &b)
+	return z
+}
+
+// Square set z=x*x in E6 and return z
+func (z *E6) Square(x *E6) *E6 {
+
+	//Algorithm 22 from https://eprint.iacr.org/2010/354.pdf
+	var c0, c2, c3 E3
+	c0.Sub(&x.B0, &x.B1)
+	c3.MulByNonResidue(&x.B1).Neg(&c3).Add(&x.B0, &c3)
+	c2.Mul(&x.B0, &x.B1)
+	c0.Mul(&c0, &c3).Add(&c0, &c2)
+	z.B1.Double(&c2)
+	c2.MulByNonResidue(&c2)
+	z.B0.Add(&c0, &c2)
+
+	return z
+}
+
+// Karabina's compressed cyclotomic square
+// https://eprint.iacr.org/2010/542.pdf
+// Th. 3.2 with minor modifications to fit our tower
+func (z *E6) CyclotomicSquareCompressed(x *E6) *E6 {
+
+	var t [7]fp.Element
+
+	// t0 = g1^2
+	t[0].Square(&x.B0.A1)
+	// t1 = g5^2
+	t[1].Square(&x.B1.A2)
+	// t5 = g1 + g5
+	t[5].Add(&x.B0.A1, &x.B1.A2)
+	// t2 = (g1 + g5)^2
+	t[2].Square(&t[5])
+
+	// t3 = g1^2 + g5^2
+	t[3].Add(&t[0], &t[1])
+	// t5 = 2 * g1 * g5
+	t[5].Sub(&t[2], &t[3])
+
+	// t6 = g3 + g2
+	t[6].Add(&x.B1.A0, &x.B0.A2)
+	// t3 = (g3 + g2)^2
+	t[3].Square(&t[6])
+	// t2 = g3^2
+	t[2].Square(&x.B1.A0)
+
+	// t6 = 2 * nr * g1 * g5
+	t[6].MulByNonResidue(&t[5])
+	// t5 = 4 * nr * g1 * g5 + 2 * g3
+	t[5].Add(&t[6], &x.B1.A0).
+		Double(&t[5])
+	// z3 = 6 * nr * g1 * g5 + 2 * g3
+	z.B1.A0.Add(&t[5], &t[6])
+
+	// t4 = nr * g5^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = nr * g5^2 + g1^2
+	t[5].Add(&t[0], &t[4])
+	// t6 = nr * g5^2 + g1^2 - g2
+	t[6].Sub(&t[5], &x.B0.A2)
+
+	// t1 = g2^2
+	t[1].Square(&x.B0.A2)
+
+	// t6 = 2 * nr * g5^2 + 2 * g1^2 - 2*g2
+	t[6].Double(&t[6])
+	// z2 = 3 * nr * g5^2 + 3 * g1^2 - 2*g2
+	z.B0.A2.Add(&t[6], &t[5])
+
+	// t4 = nr * g2^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = g3^2 + nr * g2^2
+	t[5].Add(&t[2], &t[4])
+	// t6 = g3^2 + nr * g2^2 - g1
+	t[6].Sub(&t[5], &x.B0.A1)
+	// t6 = 2 * g3^2 + 2 * nr * g2^2 - 2 * g1
+	t[6].Double(&t[6])
+	// z1 = 3 * g3^2 + 3 * nr * g2^2 - 2 * g1
+	z.B0.A1.Add(&t[6], &t[5])
+
+	// t0 = g2^2 + g3^2
+	t[0].Add(&t[2], &t[1])
+	// t5 = 2 * g3 * g2
+	t[5].Sub(&t[3], &t[0])
+	// t6 = 2 * g3 * g2 + g5
+	t[6].Add(&t[5], &x.B1.A2)
+	// t6 = 4 * g3 * g2 + 2 * g5
+	t[6].Double(&t[6])
+	// z5 = 6 * g3 * g2 + 2 * g5
+	z.B1.A2.Add(&t[5], &t[6])
+
+	return z
+}
+
+// Decompress Karabina's cyclotomic square result
+func (z *E6) Decompress(x *E6) *E6 {
+
+	var t [3]fp.Element
+	var one fp.Element
+	one.SetOne()
+
+	// t0 = g1^2
+	t[0].Square(&x.B0.A1)
+	// t1 = 3 * g1^2 - 2 * g2
+	t[1].Sub(&t[0], &x.B0.A2).
+		Double(&t[1]).
+		Add(&t[1], &t[0])
+		// t0 = E * g5^2 + t1
+	t[2].Square(&x.B1.A2)
+	t[0].MulByNonResidue(&t[2]).
+		Add(&t[0], &t[1])
+	// t1 = 1/(4 * g3)
+	t[1].Double(&x.B1.A0).
+		Double(&t[1]).
+		Inverse(&t[1]) // costly
+	// z4 = g4
+	z.B1.A1.Mul(&t[0], &t[1])
+
+	// t1 = g2 * g1
+	t[1].Mul(&x.B0.A2, &x.B0.A1)
+	// t2 = 2 * g4^2 - 3 * g2 * g1
+	t[2].Square(&x.B1.A1).
+		Sub(&t[2], &t[1]).
+		Double(&t[2]).
+		Sub(&t[2], &t[1])
+	// t1 = g3 * g5
+	t[1].Mul(&x.B1.A0, &x.B1.A2)
+	// c_0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+	t[2].Add(&t[2], &t[1])
+	z.B0.A0.MulByNonResidue(&t[2]).
+		Add(&z.B0.A0, &one)
+
+	z.B0.A1.Set(&x.B0.A1)
+	z.B0.A2.Set(&x.B0.A2)
+	z.B1.A0.Set(&x.B1.A0)
+	z.B1.A2.Set(&x.B1.A2)
+
+	return z
+}
+
+// Granger-Scott's cyclotomic square
+// https://eprint.iacr.org/2009/565.pdf, 3.2
+func (z *E6) CyclotomicSquare(x *E6) *E6 {
+	// x=(x0,x1,x2,x3,x4,x5,x6,x7) in E3^6
+	// cyclosquare(x)=(3*x4^2*u + 3*x0^2 - 2*x0,
+	//					3*x2^2*u + 3*x3^2 - 2*x1,
+	//					3*x5^2*u + 3*x1^2 - 2*x2,
+	//					6*x1*x5*u + 2*x3,
+	//					6*x0*x4 + 2*x4,
+	//					6*x2*x3 + 2*x5)
+
+	var t [9]fp.Element
+
+	t[0].Square(&x.B1.A1)
+	t[1].Square(&x.B0.A0)
+	t[6].Add(&x.B1.A1, &x.B0.A0).Square(&t[6]).Sub(&t[6], &t[0]).Sub(&t[6], &t[1]) // 2*x4*x0
+	t[2].Square(&x.B0.A2)
+	t[3].Square(&x.B1.A0)
+	t[7].Add(&x.B0.A2, &x.B1.A0).Square(&t[7]).Sub(&t[7], &t[2]).Sub(&t[7], &t[3]) // 2*x2*x3
+	t[4].Square(&x.B1.A2)
+	t[5].Square(&x.B0.A1)
+	t[8].Add(&x.B1.A2, &x.B0.A1).Square(&t[8]).Sub(&t[8], &t[4]).Sub(&t[8], &t[5]).MulByNonResidue(&t[8]) // 2*x5*x1*u
+
+	t[0].MulByNonResidue(&t[0]).Add(&t[0], &t[1]) // x4^2*u + x0^2
+	t[2].MulByNonResidue(&t[2]).Add(&t[2], &t[3]) // x2^2*u + x3^2
+	t[4].MulByNonResidue(&t[4]).Add(&t[4], &t[5]) // x5^2*u + x1^2
+
+	z.B0.A0.Sub(&t[0], &x.B0.A0).Double(&z.B0.A0).Add(&z.B0.A0, &t[0])
+	z.B0.A1.Sub(&t[2], &x.B0.A1).Double(&z.B0.A1).Add(&z.B0.A1, &t[2])
+	z.B0.A2.Sub(&t[4], &x.B0.A2).Double(&z.B0.A2).Add(&z.B0.A2, &t[4])
+
+	z.B1.A0.Add(&t[8], &x.B1.A0).Double(&z.B1.A0).Add(&z.B1.A0, &t[8])
+	z.B1.A1.Add(&t[6], &x.B1.A1).Double(&z.B1.A1).Add(&z.B1.A1, &t[6])
+	z.B1.A2.Add(&t[7], &x.B1.A2).Double(&z.B1.A2).Add(&z.B1.A2, &t[7])
+
+	return z
+}
+
+// Inverse set z to the inverse of x in E6 and return z
+func (z *E6) Inverse(x *E6) *E6 {
+	// Algorithm 23 from https://eprint.iacr.org/2010/354.pdf
+
+	var t0, t1, tmp E3
+	t0.Square(&x.B0)
+	t1.Square(&x.B1)
+	tmp.MulByNonResidue(&t1)
+	t0.Sub(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.B0.Mul(&x.B0, &t1)
+	z.B1.Mul(&x.B1, &t1).Neg(&z.B1)
+
+	return z
+}
+
+// Exp sets z=x**e and returns it
+func (z *E6) Exp(x *E6, e big.Int) *E6 {
+	var res E6
+	res.SetOne()
+	b := e.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0x80)
+		for j := 7; j >= 0; j-- {
+			res.Square(&res)
+			if (w&mask)>>j != 0 {
+				res.Mul(&res, x)
+			}
+			mask = mask >> 1
+		}
+	}
+	z.Set(&res)
+	return z
+}
+
+// InverseUnitary inverse a unitary element
+func (z *E6) InverseUnitary(x *E6) *E6 {
+	return z.Conjugate(x)
+}
+
+// Conjugate set z to x conjugated and return z
+func (z *E6) Conjugate(x *E6) *E6 {
+	*z = *x
+	z.B1.Neg(&z.B1)
+	return z
+}
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fp.Bytes * 6
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E6) Bytes() (r [SizeOfGT]byte) {
+
+	offset := 0
+	var buf [fp.Bytes]byte
+
+	buf = z.B1.A2.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B1.A1.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B1.A0.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A2.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A1.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+	offset += fp.Bytes
+
+	buf = z.B0.A0.Bytes()
+	copy(r[offset:offset+fp.Bytes], buf[:])
+
+	return
+}
+
+// SetBytes interprets e as the bytes of a big-endian GT
+// sets z to that value (in Montgomery form), and returns z.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E6) SetBytes(e []byte) error {
+	if len(e) != SizeOfGT {
+		return errors.New("invalid buffer size")
+	}
+	offset := 0
+	z.B1.A2.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B1.A1.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B1.A0.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A2.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A1.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+	z.B0.A0.SetBytes(e[offset : offset+fp.Bytes])
+	offset += fp.Bytes
+
+	return nil
+}
+
+// IsInSubGroup ensures GT/E6 is in correct sugroup
+func (z *E6) IsInSubGroup() bool {
+	var one, _z E6
+	one.SetOne()
+	_z.Exp(z, *fr.Modulus())
+	return _z.Equal(&one)
+}
diff --git a/ecc/bw6-756/internal/fptower/e6_pairing.go b/ecc/bw6-756/internal/fptower/e6_pairing.go
new file mode 100644
index 000000000..c177bfaa7
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6_pairing.go
@@ -0,0 +1,127 @@
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+func (z *E6) nSquare(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquare(z)
+	}
+}
+
+func (z *E6) nSquareCompressed(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquareCompressed(z)
+	}
+}
+
+// Expt set z to x^t in E6 and return z
+func (z *E6) Expt(x *E6) *E6 {
+
+	// Expt computation is derived from the addition chain:
+	//
+	//	_1000     = 1 << 3
+	//	_1001     = 1 + _1000
+	//	_1001000  = _1001 << 3
+	//	_1010001  = _1001 + _1001000
+	//	_10011001 = _1001000 + _1010001
+	//	i67       = ((_10011001 << 5 + _1001) << 10 + _1010001) << 41
+	//	return      1 + i67
+	//
+	// Operations: 62 squares 6 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Allocate Temporaries.
+	var result, t0, t1 E6
+
+	// Step 3: result = x^0x8
+	result.CyclotomicSquare(x)
+	result.nSquare(2)
+
+	// Step 4: t0 = x^0x9
+	t0.Mul(x, &result)
+
+	// Step 7: t1 = x^0x48
+	t1.CyclotomicSquare(&t0)
+	t1.nSquare(2)
+
+	// Step 8: result = x^0x51
+	result.Mul(&t0, &t1)
+
+	// Step 9: t1 = x^0x99
+	t1.Mul(&t1, &result)
+
+	// Step 14: t1 = x^0x1320
+	t1.nSquare(5)
+
+	// Step 15: t0 = x^0x1329
+	t0.Mul(&t0, &t1)
+
+	// Step 25: t0 = x^0x4ca400
+	t0.nSquare(10)
+
+	// Step 26: result = x^0x4ca451
+	result.Mul(&result, &t0)
+
+	// Step 67: result = x^0x9948a20000000000
+	result.nSquareCompressed(41)
+	result.Decompress(&result)
+
+	// Step 68: result = x^0x9948a20000000001
+	z.Mul(x, &result)
+
+	return z
+}
+
+// MulBy034 multiplication by sparse element (c0,0,0,c3,c4,0)
+func (z *E6) MulBy034(c0, c3, c4 *fp.Element) *E6 {
+
+	var a, b, d E3
+
+	a.MulByElement(&z.B0, c0)
+
+	b.Set(&z.B1)
+	b.MulBy01(c3, c4)
+
+	c0.Add(c0, c3)
+	d.Add(&z.B0, &z.B1)
+	d.MulBy01(c0, c4)
+
+	z.B1.Add(&a, &b).Neg(&z.B1).Add(&z.B1, &d)
+	z.B0.MulByNonResidue(&b).Add(&z.B0, &a)
+
+	return z
+}
+
+// Mul034By034 multiplication of sparse element (c0,0,0,c3,c4,0) by sparse element (d0,0,0,d3,d4,0)
+func (z *E6) Mul034By034(d0, d3, d4, c0, c3, c4 *fp.Element) *E6 {
+	var tmp, x0, x3, x4, x04, x03, x34 fp.Element
+	x0.Mul(c0, d0)
+	x3.Mul(c3, d3)
+	x4.Mul(c4, d4)
+	tmp.Add(c0, c4)
+	x04.Add(d0, d4).
+		Mul(&x04, &tmp).
+		Sub(&x04, &x0).
+		Sub(&x04, &x4)
+	tmp.Add(c0, c3)
+	x03.Add(d0, d3).
+		Mul(&x03, &tmp).
+		Sub(&x03, &x0).
+		Sub(&x03, &x3)
+	tmp.Add(c3, c4)
+	x34.Add(d3, d4).
+		Mul(&x34, &tmp).
+		Sub(&x34, &x3).
+		Sub(&x34, &x4)
+
+	z.B0.A0.MulByNonResidue(&x4).
+		Add(&z.B0.A0, &x0)
+	z.B0.A1.Set(&x3)
+	z.B0.A2.Set(&x34)
+	z.B1.A0.Set(&x03)
+	z.B1.A1.Set(&x04)
+	z.B1.A2.SetZero()
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/e6_test.go b/ecc/bw6-756/internal/fptower/e6_test.go
new file mode 100644
index 000000000..078d5c7b1
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/e6_test.go
@@ -0,0 +1,387 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE6Serialization(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+
+	properties.Property("[BW6-756] SetBytes(Bytes()) should stay constant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			buf := a.Bytes()
+			if err := b.SetBytes(buf[:]); err != nil {
+				return false
+			}
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+
+	properties.Property("[BW6-756] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Cyclotomic square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.CyclotomicSquare(a)
+			a.CyclotomicSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Having the receiver as operand (Frobenius) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Frobenius(a)
+			a.Frobenius(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+
+	properties.Property("[BW6-756] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c E6
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BW6-756] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] square and mul should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			var e, f, g E3
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.B0)
+			f.Double(&a.B1)
+			return c.B1.Equal(&g) && d.B0.Equal(&g) && e.Equal(&c.B0) && f.Equal(&d.B1)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] pi**12=id", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Frobenius(a).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] cyclotomic square (Granger-Scott) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.Frobenius(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquare(a)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] compressed cyclotomic square (Karabina) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E6) bool {
+			var b, c, d E6
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.Frobenius(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquareCompressed(a).Decompress(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Frobenius of x in E6 should be equal to x^q", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			q := fp.Modulus()
+			b.Frobenius(a)
+			c.Exp(a, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE6Add(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE6Sub(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE6Mul(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE6Cyclosquare(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.CyclotomicSquare(&a)
+	}
+}
+
+func BenchmarkE6Square(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE6Inverse(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE6Conjugate(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
+
+func BenchmarkE6Frobenius(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Frobenius(&a)
+	}
+}
+
+func BenchmarkE6Expt(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	b.ResetTimer()
+	c.Conjugate(&a)
+	a.Inverse(&a)
+	c.Mul(&c, &a)
+
+	a.Frobenius(&c).
+		Mul(&a, &c)
+
+	for i := 0; i < b.N; i++ {
+		a.Expt(&a)
+	}
+}
diff --git a/ecc/bw6-756/internal/fptower/frobenius.go b/ecc/bw6-756/internal/fptower/frobenius.go
new file mode 100644
index 000000000..73a7602c6
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/frobenius.go
@@ -0,0 +1,102 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+var _frobA = fp.Element{
+	4513305906938863657,
+	16223881110415437916,
+	2594807996890465129,
+	12027263585750947831,
+	4394688080420790544,
+	16545365607090591069,
+	17206939158340345469,
+	16693218895653628888,
+	12341936222077983834,
+	15961798706098381578,
+	6325965824540199947,
+	854909948470066,
+}
+var _frobB = fp.Element{
+	13933438166770692198,
+	9936849508207988643,
+	15731274946730933551,
+	17453539207763286666,
+	9211229669332609391,
+	16304457798847396452,
+	9530634072302290725,
+	16589137634438497937,
+	3757329544587311773,
+	6048657743386074056,
+	539268601340212626,
+	3128351770947469,
+}
+var _frobC = fp.Element{
+	4513305906938859419,
+	12241098542315434076,
+	17754824365858099600,
+	5821813791745674579,
+	7115107423905013045,
+	2898523548767316962,
+	7403683460125356932,
+	16613279480632639560,
+	14397298621774850312,
+	623298467364696769,
+	15794181680107729725,
+	1224261424482813,
+}
+var _frobAC = fp.Element{
+	4239,
+	7713986544913874944,
+	18326082943621398681,
+	11034058719804682881,
+	13605917749753399936,
+	14403079332228435905,
+	8290829156933084579,
+	14835612456382575210,
+	16099265766665295608,
+	3563712375774904018,
+	6865234425880412574,
+	3983261719417535,
+}
+var _frobBC = fp.Element{
+	13933438166770687960,
+	5954066940107984803,
+	12444547241989016406,
+	11248089413758013415,
+	11931649012816831892,
+	2657615740524122345,
+	18174122447796853804,
+	16509198219417508608,
+	5812691944284178251,
+	9156901578361940863,
+	10007484456907742403,
+	3497703246960216,
+}
+
+// Frobenius set z in E6 to Frobenius(x), return z
+func (z *E6) Frobenius(x *E6) *E6 {
+
+	z.B0.A0 = x.B0.A0
+	z.B0.A1.Mul(&x.B0.A1, &_frobA)
+	z.B0.A2.Mul(&x.B0.A2, &_frobB)
+
+	z.B1.A0.Mul(&x.B1.A0, &_frobC)
+	z.B1.A1.Mul(&x.B1.A1, &_frobAC)
+	z.B1.A2.Mul(&x.B1.A2, &_frobBC)
+
+	return z
+}
diff --git a/ecc/bw6-756/internal/fptower/generators_test.go b/ecc/bw6-756/internal/fptower/generators_test.go
new file mode 100644
index 000000000..2c948398f
--- /dev/null
+++ b/ecc/bw6-756/internal/fptower/generators_test.go
@@ -0,0 +1,43 @@
+package fptower
+
+import (
+	"crypto/rand"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/leanovate/gopter"
+)
+
+// TODO all gopter.Gen are incorrect, use same model as goff
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		rand.Read(b[:])
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE3 generates an E3 elmt
+func GenE3() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) *E3 {
+		return &E3{A0: values[0].(fp.Element), A1: values[1].(fp.Element), A2: values[2].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE3(),
+		GenE3(),
+	).Map(func(values []interface{}) *E6 {
+		return &E6{B0: *values[0].(*E3), B1: *values[1].(*E3)}
+	})
+}
diff --git a/ecc/bw6-756/marshal.go b/ecc/bw6-756/marshal.go
new file mode 100644
index 000000000..9d67b4e82
--- /dev/null
+++ b/ecc/bw6-756/marshal.go
@@ -0,0 +1,1155 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+	"reflect"
+	"sync/atomic"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// To encode G1Affine and G2Affine points, we mask the most significant bits with these bits to specify without ambiguity
+// metadata needed for point (de)compression
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+const (
+	mMask                 byte = 0b111 << 5
+	mUncompressed         byte = 0b000 << 5
+	mUncompressedInfinity byte = 0b010 << 5
+	mCompressedSmallest   byte = 0b100 << 5
+	mCompressedLargest    byte = 0b101 << 5
+	mCompressedInfinity   byte = 0b110 << 5
+)
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fptower.SizeOfGT
+
+// Encoder writes bw6-756 object values to an output stream
+type Encoder struct {
+	w   io.Writer
+	n   int64 // written bytes
+	raw bool  // raw vs compressed encoding
+}
+
+// Decoder reads bw6-756 object values from an inbound stream
+type Decoder struct {
+	r             io.Reader
+	n             int64 // read bytes
+	subGroupCheck bool  // default to true
+}
+
+// NewDecoder returns a binary decoder supporting curve bw6-756 objects in both
+// compressed and uncompressed (raw) forms
+func NewDecoder(r io.Reader, options ...func(*Decoder)) *Decoder {
+	d := &Decoder{r: r, subGroupCheck: true}
+
+	for _, o := range options {
+		o(d)
+	}
+
+	return d
+}
+
+// Decode reads the binary encoding of v from the stream
+// type must be *uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, *[]G1Affine or *[]G2Affine
+func (dec *Decoder) Decode(v interface{}) (err error) {
+	rv := reflect.ValueOf(v)
+	if rv.Kind() != reflect.Ptr || rv.IsNil() || !rv.Elem().CanSet() {
+		return errors.New("bw6-756 decoder: unsupported type, need pointer")
+	}
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// in particular, careful attention must be given to usage of Bytes() method on Elements and Points
+	// that return an array (not a slice) of bytes. Using this is beneficial to minimize memallocs
+	// in very large (de)serialization upstream in gnark.
+	// (but detrimental to code lisibility here)
+	// TODO double check memory usage and factorize this
+
+	var buf [SizeOfG2AffineUncompressed]byte
+	var read int
+
+	switch t := v.(type) {
+	case *fr.Element:
+		read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fr.Bytes])
+		return
+	case *fp.Element:
+		read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fp.Bytes])
+		return
+	case *[]fr.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fr.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fr.Bytes])
+		}
+		return
+	case *[]fp.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fp.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fp.Bytes])
+		}
+		return
+	case *G1Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG1AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG1AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *G2Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG2AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG2AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *[]G1Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G1Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG1AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG1AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	case *[]G2Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G2Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG2AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG2AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("bw6-756 encoder: unsupported type")
+		}
+		err = binary.Read(dec.r, binary.BigEndian, t)
+		if err == nil {
+			dec.n += int64(n)
+		}
+		return
+	}
+}
+
+// BytesRead return total bytes read from reader
+func (dec *Decoder) BytesRead() int64 {
+	return dec.n
+}
+
+func (dec *Decoder) readUint32() (r uint32, err error) {
+	var read int
+	var buf [4]byte
+	read, err = io.ReadFull(dec.r, buf[:4])
+	dec.n += int64(read)
+	if err != nil {
+		return
+	}
+	r = binary.BigEndian.Uint32(buf[:4])
+	return
+}
+
+func isCompressed(msb byte) bool {
+	mData := msb & mMask
+	return !((mData == mUncompressed) || (mData == mUncompressedInfinity))
+}
+
+// NewEncoder returns a binary encoder supporting curve bw6-756 objects
+func NewEncoder(w io.Writer, options ...func(*Encoder)) *Encoder {
+	// default settings
+	enc := &Encoder{
+		w:   w,
+		n:   0,
+		raw: false,
+	}
+
+	// handle options
+	for _, option := range options {
+		option(enc)
+	}
+
+	return enc
+}
+
+// Encode writes the binary encoding of v to the stream
+// type must be uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, []G1Affine or []G2Affine
+func (enc *Encoder) Encode(v interface{}) (err error) {
+	if enc.raw {
+		return enc.encodeRaw(v)
+	}
+	return enc.encode(v)
+}
+
+// BytesWritten return total bytes written on writer
+func (enc *Encoder) BytesWritten() int64 {
+	return enc.n
+}
+
+// RawEncoding returns an option to use in NewEncoder(...) which sets raw encoding mode to true
+// points will not be compressed using this option
+func RawEncoding() func(*Encoder) {
+	return func(enc *Encoder) {
+		enc.raw = true
+	}
+}
+
+// NoSubgroupChecks returns an option to use in NewDecoder(...) which disable subgroup checks on the points
+// the decoder will read. Use with caution, as crafted points from an untrusted source can lead to crypto-attacks.
+func NoSubgroupChecks() func(*Decoder) {
+	return func(dec *Decoder) {
+		dec.subGroupCheck = false
+	}
+}
+
+func (enc *Encoder) encode(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+func (enc *Encoder) encodeRaw(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+// SizeOfG1AffineCompressed represents the size in bytes that a G1Affine need in binary form, compressed
+const SizeOfG1AffineCompressed = 96
+
+// SizeOfG1AffineUncompressed represents the size in bytes that a G1Affine need in binary form, uncompressed
+const SizeOfG1AffineUncompressed = SizeOfG1AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G1Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G1Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G1Affine) Bytes() (res [SizeOfG1AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G1Affine) RawBytes() (res [SizeOfG1AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+	binary.BigEndian.PutUint64(res[136:144], tmp[6])
+	binary.BigEndian.PutUint64(res[128:136], tmp[7])
+	binary.BigEndian.PutUint64(res[120:128], tmp[8])
+	binary.BigEndian.PutUint64(res[112:120], tmp[9])
+	binary.BigEndian.PutUint64(res[104:112], tmp[10])
+	binary.BigEndian.PutUint64(res[96:104], tmp[11])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G1Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G1Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG1AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG1AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG1AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G1Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G1Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
+
+// SizeOfG2AffineCompressed represents the size in bytes that a G2Affine need in binary form, compressed
+const SizeOfG2AffineCompressed = 96
+
+// SizeOfG2AffineUncompressed represents the size in bytes that a G2Affine need in binary form, uncompressed
+const SizeOfG2AffineUncompressed = SizeOfG2AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G2Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G2Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G2Affine) Bytes() (res [SizeOfG2AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+	binary.BigEndian.PutUint64(res[136:144], tmp[6])
+	binary.BigEndian.PutUint64(res[128:136], tmp[7])
+	binary.BigEndian.PutUint64(res[120:128], tmp[8])
+	binary.BigEndian.PutUint64(res[112:120], tmp[9])
+	binary.BigEndian.PutUint64(res[104:112], tmp[10])
+	binary.BigEndian.PutUint64(res[96:104], tmp[11])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+	binary.BigEndian.PutUint64(res[40:48], tmp[6])
+	binary.BigEndian.PutUint64(res[32:40], tmp[7])
+	binary.BigEndian.PutUint64(res[24:32], tmp[8])
+	binary.BigEndian.PutUint64(res[16:24], tmp[9])
+	binary.BigEndian.PutUint64(res[8:16], tmp[10])
+	binary.BigEndian.PutUint64(res[0:8], tmp[11])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G2Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G2Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG2AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG2AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG2AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G2Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G2Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
diff --git a/ecc/bw6-756/marshal_test.go b/ecc/bw6-756/marshal_test.go
new file mode 100644
index 000000000..96540df8e
--- /dev/null
+++ b/ecc/bw6-756/marshal_test.go
@@ -0,0 +1,457 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"bytes"
+	"io"
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+)
+
+func TestEncoder(t *testing.T) {
+
+	// TODO need proper fuzz testing here
+
+	var inA uint64
+	var inB fr.Element
+	var inC fp.Element
+	var inD G1Affine
+	var inE G1Affine
+	var inF G2Affine
+	var inG []G1Affine
+	var inH []G2Affine
+	var inI []fp.Element
+	var inJ []fr.Element
+
+	// set values of inputs
+	inA = rand.Uint64()
+	inB.SetRandom()
+	inC.SetRandom()
+	inD.ScalarMultiplication(&g1GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	// inE --> infinity
+	inF.ScalarMultiplication(&g2GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	inG = make([]G1Affine, 2)
+	inH = make([]G2Affine, 0)
+	inG[1] = inD
+	inI = make([]fp.Element, 3)
+	inI[2] = inD.X
+	inJ = make([]fr.Element, 0)
+
+	// encode them, compressed and raw
+	var buf, bufRaw bytes.Buffer
+	enc := NewEncoder(&buf)
+	encRaw := NewEncoder(&bufRaw, RawEncoding())
+	toEncode := []interface{}{inA, &inB, &inC, &inD, &inE, &inF, inG, inH, inI, inJ}
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+		if err := encRaw.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	testDecode := func(t *testing.T, r io.Reader, n int64) {
+		dec := NewDecoder(r)
+		var outA uint64
+		var outB fr.Element
+		var outC fp.Element
+		var outD G1Affine
+		var outE G1Affine
+		outE.X.SetOne()
+		outE.Y.SetUint64(42)
+		var outF G2Affine
+		var outG []G1Affine
+		var outH []G2Affine
+		var outI []fp.Element
+		var outJ []fr.Element
+
+		toDecode := []interface{}{&outA, &outB, &outC, &outD, &outE, &outF, &outG, &outH, &outI, &outJ}
+		for _, v := range toDecode {
+			if err := dec.Decode(v); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// compare values
+		if inA != outA {
+			t.Fatal("didn't encode/decode uint64 value properly")
+		}
+
+		if !inB.Equal(&outB) || !inC.Equal(&outC) {
+			t.Fatal("decode(encode(Element) failed")
+		}
+		if !inD.Equal(&outD) || !inE.Equal(&outE) {
+			t.Fatal("decode(encode(G1Affine) failed")
+		}
+		if !inF.Equal(&outF) {
+			t.Fatal("decode(encode(G2Affine) failed")
+		}
+		if (len(inG) != len(outG)) || (len(inH) != len(outH)) {
+			t.Fatal("decode(encode(slice(points))) failed")
+		}
+		for i := 0; i < len(inG); i++ {
+			if !inG[i].Equal(&outG[i]) {
+				t.Fatal("decode(encode(slice(points))) failed")
+			}
+		}
+		if (len(inI) != len(outI)) || (len(inJ) != len(outJ)) {
+			t.Fatal("decode(encode(slice(elements))) failed")
+		}
+		for i := 0; i < len(inI); i++ {
+			if !inI[i].Equal(&outI[i]) {
+				t.Fatal("decode(encode(slice(elements))) failed")
+			}
+		}
+		if n != dec.BytesRead() {
+			t.Fatal("bytes read don't match bytes written")
+		}
+	}
+
+	// decode them
+	testDecode(t, &buf, enc.BytesWritten())
+	testDecode(t, &bufRaw, encRaw.BytesWritten())
+
+}
+
+func TestIsCompressed(t *testing.T) {
+	var g1Inf, g1 G1Affine
+	var g2Inf, g2 G2Affine
+
+	g1 = g1GenAff
+	g2 = g2GenAff
+
+	{
+		b := g1Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g1.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2.RawBytes() should be uncompressed")
+		}
+	}
+
+}
+
+func TestG1AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// define Gopters generators
+
+// GenFr generates an Fr element
+func GenFr() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fr.Element
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE3 generates an E3 elmt
+func GenE3() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) fptower.E3 {
+		return fptower.E3{A0: values[0].(fp.Element), A1: values[1].(fp.Element), A2: values[2].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE3(),
+		GenE3(),
+	).Map(func(values []interface{}) fptower.E6 {
+		return fptower.E6{B0: values[0].(fptower.E3), B1: values[1].(fptower.E3)}
+	})
+}
+
+// GenBigInt generates a big.Int
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
diff --git a/ecc/bw6-756/multiexp.go b/ecc/bw6-756/multiexp.go
new file mode 100644
index 000000000..dc4306401
--- /dev/null
+++ b/ecc/bw6-756/multiexp.go
@@ -0,0 +1,983 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+	"math"
+	"runtime"
+)
+
+// selector stores the index, mask and shifts needed to select bits from a scalar
+// it is used during the multiExp algorithm or the batch scalar multiplication
+type selector struct {
+	index uint64 // index in the multi-word scalar to select bits from
+	mask  uint64 // mask (c-bit wide)
+	shift uint64 // shift needed to get our bits on low positions
+
+	multiWordSelect bool   // set to true if we need to select bits from 2 words (case where c doesn't divide 64)
+	maskHigh        uint64 // same than mask, for index+1
+	shiftHigh       uint64 // same than shift, for index+1
+}
+
+// partitionScalars  compute, for each scalars over c-bit wide windows, nbChunk digits
+// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+// 2^{c} to the current digit, making it negative.
+// negative digits can be processed in a later step as adding -G into the bucket instead of G
+// (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
+// scalarsMont indicates wheter the provided scalars are in montgomery form
+// returns smallValues, which represent the number of scalars which meets the following condition
+// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
+func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
+	toReturn := make([]fr.Element, len(scalars))
+
+	// number of c-bit radixes in a scalar
+	nbChunks := fr.Limbs * 64 / c
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+
+	mask := uint64((1 << c) - 1)      // low c bits are 1
+	msbWindow := uint64(1 << (c - 1)) // msb of the c-bit window
+	max := int(1 << (c - 1))          // max value we want for our digits
+	cDivides64 := (64 % c) == 0       // if c doesn't divide 64, we may need to select over multiple words
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := uint64(0); chunk < nbChunks; chunk++ {
+		jc := uint64(chunk * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = !cDivides64 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+
+	// for each chunk, we could track the number of non-zeros points we will need to process
+	// this way, if a chunk has more work to do than others, we can spawn off more go routines
+	// (at the cost of more buckets allocated)
+	// a simplified approach is to track the small values where only the first word is set
+	// if this number represent a significant number of points, then we will split first chunk
+	// processing in the msm in 2, to ensure all go routines finish at ~same time
+	// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
+	// if it does, though, this will deadlocK.
+	chSmallValues := make(chan int, nbTasks)
+
+	parallel.Execute(len(scalars), func(start, end int) {
+		smallValues := 0
+		for i := start; i < end; i++ {
+			var carry int
+
+			scalar := scalars[i]
+			if scalarsMont {
+				scalar.FromMont()
+			}
+			if scalar.IsUint64() {
+				// everything is 0, no need to process this scalar
+				if scalar[0] == 0 {
+					continue
+				}
+				// low c-bits are 1 in mask
+				if scalar[0]&mask == scalar[0] {
+					smallValues++
+				}
+			}
+
+			// for each chunk in the scalar, compute the current digit, and an eventual carry
+			for chunk := uint64(0); chunk < nbChunks; chunk++ {
+				s := selectors[chunk]
+
+				// init with carry if any
+				digit := carry
+				carry = 0
+
+				// digit = value of the c-bit window
+				digit += int((scalar[s.index] & s.mask) >> s.shift)
+
+				if s.multiWordSelect {
+					// we are selecting bits over 2 words
+					digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
+				}
+
+				// if digit is zero, no impact on result
+				if digit == 0 {
+					continue
+				}
+
+				// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+				// 2^{c} to the current digit, making it negative.
+				if digit >= max {
+					digit -= (1 << c)
+					carry = 1
+				}
+
+				var bits uint64
+				if digit >= 0 {
+					bits = uint64(digit)
+				} else {
+					bits = uint64(-digit-1) | msbWindow
+				}
+
+				toReturn[i][s.index] |= (bits << s.shift)
+				if s.multiWordSelect {
+					toReturn[i][s.index+1] |= (bits >> s.shiftHigh)
+				}
+
+			}
+		}
+
+		chSmallValues <- smallValues
+
+	}, nbTasks)
+
+	// aggregate small values
+	close(chSmallValues)
+	smallValues := 0
+	for o := range chSmallValues {
+		smallValues += o
+	}
+	return toReturn, smallValues
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Affine, error) {
+	var _p G1Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 8, 16}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG1Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G1Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG1Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG1Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG1Jac(p *G1Jac, c int, points []G1Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG1Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG1Affine(p *G1Jac, c int, chChunks []chan g1JacExtended) *G1Jac {
+	var _p g1JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG1Affine(chunk uint64,
+	chRes chan<- g1JacExtended,
+	buckets []g1JacExtended,
+	c uint64,
+	points []G1Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g1JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 384, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Affine, error) {
+	var _p G2Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 8, 16}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG2Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G2Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG2Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG2Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG2Jac(p *G2Jac, c int, points []G2Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG2Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG2Affine(p *G2Jac, c int, chChunks []chan g2JacExtended) *G2Jac {
+	var _p g2JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG2Affine(chunk uint64,
+	chRes chan<- g2JacExtended,
+	buckets []g2JacExtended,
+	c uint64,
+	points []G2Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g2JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 384, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
diff --git a/ecc/bw6-756/multiexp_test.go b/ecc/bw6-756/multiexp_test.go
new file mode 100644
index 000000000..c95bfed83
--- /dev/null
+++ b/ecc/bw6-756/multiexp_test.go
@@ -0,0 +1,701 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestMultiExpG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G1Affine
+	var g G1Jac
+	g.Set(&g1Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g1Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G1] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G1Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G1Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G1] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G1Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G1Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G1] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G1] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G1Jac
+			g.Set(&g1Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G1Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g1Gen)
+			}
+
+			var op1MultiExp G1Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G1Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g1GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG1(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var t1, t2, t3 G1Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
+
+func TestMultiExpG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G2Affine
+	var g G2Jac
+	g.Set(&g2Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g2Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G2] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G2Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G2Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G2] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G2Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G2Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G2] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G2] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G2Jac
+			g.Set(&g2Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G2Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g2Gen)
+			}
+
+			var op1MultiExp G2Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G2Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g2GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG2(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var t1, t2, t3 G2Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
diff --git a/ecc/bw6-756/pairing.go b/ecc/bw6-756/pairing.go
new file mode 100644
index 000000000..d1401819d
--- /dev/null
+++ b/ecc/bw6-756/pairing.go
@@ -0,0 +1,366 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bw6756
+
+import (
+	"errors"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/internal/fptower"
+)
+
+// GT target group of the pairing
+type GT = fptower.E6
+
+type lineEvaluation struct {
+	r0 fp.Element
+	r1 fp.Element
+	r2 fp.Element
+}
+
+// Pair calculates the reduced pairing for a set of points
+func Pair(P []G1Affine, Q []G2Affine) (GT, error) {
+	f, err := MillerLoop(P, Q)
+	if err != nil {
+		return GT{}, err
+	}
+	return FinalExponentiation(&f), nil
+}
+
+// PairingCheck calculates the reduced pairing for a set of points and returns True if the result is One
+func PairingCheck(P []G1Affine, Q []G2Affine) (bool, error) {
+	f, err := Pair(P, Q)
+	if err != nil {
+		return false, err
+	}
+	var one GT
+	one.SetOne()
+	return f.Equal(&one), nil
+}
+
+// FinalExponentiation computes the final expo x**(c*(p**3-1)(p+1)(p**2-p+1)/r)
+func FinalExponentiation(z *GT, _z ...*GT) GT {
+
+	var result GT
+	result.Set(z)
+
+	for _, e := range _z {
+		result.Mul(&result, e)
+	}
+
+	var buf GT
+
+	// easy part exponent: (p**3 - 1)*(p+1)
+	buf.Conjugate(&result)
+	result.Inverse(&result)
+	buf.Mul(&buf, &result)
+	result.Frobenius(&buf).
+		Mul(&result, &buf)
+
+		// hard part exponent: 12(u+1)(p**2 - p + 1)/r
+	var m1, _m1, m2, _m2, m3, f0, f0_36, g0, g1, _g1, g2, g3, _g3, g4, _g4, g5, _g5, g6, gA, gB, g034, _g1g2, gC, h1, h2, h2g2C, h4 GT
+	m1.Expt(&result)
+	_m1.Conjugate(&m1)
+	m2.Expt(&m1)
+	_m2.Conjugate(&m2)
+	m3.Expt(&m2)
+	f0.Frobenius(&result).
+		Mul(&f0, &result).
+		Mul(&f0, &m2)
+	m2.CyclotomicSquare(&_m1)
+	f0.Mul(&f0, &m2)
+	f0_36.CyclotomicSquare(&f0).
+		CyclotomicSquare(&f0_36).
+		CyclotomicSquare(&f0_36).
+		Mul(&f0_36, &f0).
+		CyclotomicSquare(&f0_36).
+		CyclotomicSquare(&f0_36)
+	g0.Mul(&result, &m1).
+		Frobenius(&g0).
+		Mul(&g0, &m3).
+		Mul(&g0, &_m2).
+		Mul(&g0, &_m1)
+	g1.Expt(&g0)
+	_g1.Conjugate(&g1)
+	g2.Expt(&g1)
+	g3.Expt(&g2)
+	_g3.Conjugate(&g3)
+	g4.Expt(&g3)
+	_g4.Conjugate(&g4)
+	g5.Expt(&g4)
+	_g5.Conjugate(&g5)
+	g6.Expt(&g5)
+	gA.Mul(&g3, &_g5).
+		CyclotomicSquare(&gA).
+		Mul(&gA, &g6).
+		Mul(&gA, &g1).
+		Mul(&gA, &g0)
+	g034.Mul(&g0, &g3).
+		Mul(&g034, &_g4)
+	gB.CyclotomicSquare(&g034).
+		Mul(&gB, &g034).
+		Mul(&gB, &g5).
+		Mul(&gB, &_g1)
+	_g1g2.Mul(&_g1, &g2)
+	gC.Mul(&_g3, &_g1g2).
+		CyclotomicSquare(&gC).
+		Mul(&gC, &_g1g2).
+		Mul(&gC, &g0).
+		CyclotomicSquare(&gC).
+		Mul(&gC, &g2).
+		Mul(&gC, &g0).
+		Mul(&gC, &g4)
+		// ht, hy = -1, -1
+		// c1 = ht**2+3*hy**2 = 4
+	h1.CyclotomicSquare(&gA).
+		CyclotomicSquare(&h1)
+	// c2 = ht+hy = -2
+	h2.CyclotomicSquare(&gB).
+		Conjugate(&h2)
+	h2g2C.CyclotomicSquare(&gC).
+		Mul(&h2g2C, &h2)
+	h4.CyclotomicSquare(&h2g2C).
+		Mul(&h4, &h2g2C).
+		CyclotomicSquare(&h4)
+	result.Mul(&h1, &h4).
+		Mul(&result, &f0_36)
+
+	return result
+}
+
+// MillerLoop Optimal Tate alternative (or twisted ate or Eta revisited)
+// Alg.2 in https://eprint.iacr.org/2021/1359.pdf
+func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
+	// check input size match
+	n := len(P)
+	if n == 0 || n != len(Q) {
+		return GT{}, errors.New("invalid inputs sizes")
+	}
+
+	// filter infinity points
+	p0 := make([]G1Affine, 0, n)
+	q := make([]G2Affine, 0, n)
+
+	for k := 0; k < n; k++ {
+		if P[k].IsInfinity() || Q[k].IsInfinity() {
+			continue
+		}
+		p0 = append(p0, P[k])
+		q = append(q, Q[k])
+	}
+
+	n = len(q)
+
+	// precomputations
+	pProj1 := make([]g1Proj, n)
+	p1 := make([]G1Affine, n)
+	p01 := make([]G1Affine, n)
+	p10 := make([]G1Affine, n)
+	pProj01 := make([]g1Proj, n) // P0+P1
+	pProj10 := make([]g1Proj, n) // P0-P1
+	l01 := make([]lineEvaluation, n)
+	l10 := make([]lineEvaluation, n)
+	for k := 0; k < n; k++ {
+		p1[k].Y.Neg(&p0[k].Y)
+		p1[k].X.Mul(&p0[k].X, &thirdRootOneG2)
+		pProj1[k].FromAffine(&p1[k])
+
+		// l_{p0,p1}(q)
+		pProj01[k].Set(&pProj1[k])
+		pProj01[k].AddMixedStep(&l01[k], &p0[k])
+		l01[k].r1.Mul(&l01[k].r1, &q[k].X)
+		l01[k].r0.Mul(&l01[k].r0, &q[k].Y)
+
+		// l_{p0,-p1}(q)
+		pProj10[k].Neg(&pProj1[k])
+		pProj10[k].AddMixedStep(&l10[k], &p0[k])
+		l10[k].r1.Mul(&l10[k].r1, &q[k].X)
+		l10[k].r0.Mul(&l10[k].r0, &q[k].Y)
+	}
+	BatchProjectiveToAffineG1(pProj01, p01)
+	BatchProjectiveToAffineG1(pProj10, p10)
+
+	// f_{a0+lambda*a1,P}(Q)
+	var result, ss GT
+	result.SetOne()
+	var l, l0 lineEvaluation
+
+	var j int8
+
+	// i = 189
+	for k := 0; k < n; k++ {
+		pProj1[k].DoubleStep(&l0)
+		l0.r1.Mul(&l0.r1, &q[k].X)
+		l0.r0.Mul(&l0.r0, &q[k].Y)
+		result.MulBy034(&l0.r0, &l0.r1, &l0.r2)
+	}
+
+	var tmp G1Affine
+	for i := 188; i >= 0; i-- {
+		result.Square(&result)
+
+		j = loopCounter1[i]*3 + loopCounter0[i]
+
+		for k := 0; k < n; k++ {
+			pProj1[k].DoubleStep(&l0)
+			l0.r1.Mul(&l0.r1, &q[k].X)
+			l0.r0.Mul(&l0.r0, &q[k].Y)
+
+			switch j {
+			case -4:
+				tmp.Neg(&p01[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case -3:
+				tmp.Neg(&p1[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case -2:
+				pProj1[k].AddMixedStep(&l, &p10[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case -1:
+				tmp.Neg(&p0[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 0:
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2)
+			case 1:
+				pProj1[k].AddMixedStep(&l, &p0[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 2:
+				tmp.Neg(&p10[k])
+				pProj1[k].AddMixedStep(&l, &tmp)
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			case 3:
+				pProj1[k].AddMixedStep(&l, &p1[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l0.r0, &l0.r1, &l0.r2)
+				result.Mul(&result, &ss)
+			case 4:
+				pProj1[k].AddMixedStep(&l, &p01[k])
+				l.r1.Mul(&l.r1, &q[k].X)
+				l.r0.Mul(&l.r0, &q[k].Y)
+				ss.Mul034By034(&l.r0, &l.r1, &l.r2, &l01[k].r0, &l01[k].r1, &l01[k].r2)
+				result.MulBy034(&l0.r0, &l0.r1, &l0.r2).
+					Mul(&result, &ss)
+			default:
+				return GT{}, errors.New("invalid loopCounter")
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// DoubleStep doubles a point in Homogenous projective coordinates, and evaluates the line in Miller loop
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g1Proj) DoubleStep(evaluations *lineEvaluation) {
+
+	// get some Element from our pool
+	var t1, A, B, C, D, E, EE, F, G, H, I, J, K fp.Element
+	A.Mul(&p.x, &p.y)
+	A.Halve()
+	B.Square(&p.y)
+	C.Square(&p.z)
+	D.Double(&C).
+		Add(&D, &C)
+	// E.Mul(&D, &bCurveCoeff)
+	E.Set(&D)
+	F.Double(&E).
+		Add(&F, &E)
+	G.Add(&B, &F)
+	G.Halve()
+	H.Add(&p.y, &p.z).
+		Square(&H)
+	t1.Add(&B, &C)
+	H.Sub(&H, &t1)
+	I.Sub(&E, &B)
+	J.Square(&p.x)
+	EE.Square(&E)
+	K.Double(&EE).
+		Add(&K, &EE)
+
+	// X, Y, Z
+	p.x.Sub(&B, &F).
+		Mul(&p.x, &A)
+	p.y.Square(&G).
+		Sub(&p.y, &K)
+	p.z.Mul(&B, &H)
+
+	// Line evaluation
+	evaluations.r0.Neg(&H)
+	evaluations.r1.Double(&J).
+		Add(&evaluations.r1, &J)
+	evaluations.r2.Set(&I)
+}
+
+// AddMixedStep point addition in Mixed Homogenous projective and Affine coordinates
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g1Proj) AddMixedStep(evaluations *lineEvaluation, a *G1Affine) {
+
+	// get some Element from our pool
+	var Y2Z1, X2Z1, O, L, C, D, E, F, G, H, t0, t1, t2, J fp.Element
+	Y2Z1.Mul(&a.Y, &p.z)
+	O.Sub(&p.y, &Y2Z1)
+	X2Z1.Mul(&a.X, &p.z)
+	L.Sub(&p.x, &X2Z1)
+	C.Square(&O)
+	D.Square(&L)
+	E.Mul(&L, &D)
+	F.Mul(&p.z, &C)
+	G.Mul(&p.x, &D)
+	t0.Double(&G)
+	H.Add(&E, &F).
+		Sub(&H, &t0)
+	t1.Mul(&p.y, &E)
+
+	// X, Y, Z
+	p.x.Mul(&L, &H)
+	p.y.Sub(&G, &H).
+		Mul(&p.y, &O).
+		Sub(&p.y, &t1)
+	p.z.Mul(&E, &p.z)
+
+	t2.Mul(&L, &a.Y)
+	J.Mul(&a.X, &O).
+		Sub(&J, &t2)
+
+	// Line evaluation
+	evaluations.r0.Set(&L)
+	evaluations.r1.Neg(&O)
+	evaluations.r2.Set(&J)
+}
diff --git a/ecc/bw6-756/pairing_test.go b/ecc/bw6-756/pairing_test.go
new file mode 100644
index 000000000..7db065814
--- /dev/null
+++ b/ecc/bw6-756/pairing_test.go
@@ -0,0 +1,306 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestPairing(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+
+	genR1 := GenFr()
+	genR2 := GenFr()
+
+	properties.Property("[BW6-756] Having the receiver as operand (final expo) should output the same result", prop.ForAll(
+		func(a GT) bool {
+			b := a
+			b = FinalExponentiation(&a)
+			a = FinalExponentiation(&a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Exponentiating FinalExpo(a) to r should output 1", prop.ForAll(
+		func(a GT) bool {
+			b := FinalExponentiation(&a)
+			return !a.IsInSubGroup() && b.IsInSubGroup()
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] Expt(Expt) and Exp(t^2) should output the same result in the cyclotomic subgroup", prop.ForAll(
+		func(a GT) bool {
+			var b, c, d GT
+			b.Conjugate(&a)
+			a.Inverse(&a)
+			b.Mul(&b, &a)
+
+			a.Frobenius(&b).
+				Mul(&a, &b)
+
+			c.Expt(&a).Expt(&c)
+			d.Exp(&a, xGen).Exp(&d, xGen)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BW6-756] bilinearity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var res, resa, resb, resab, zero GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint, ab big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+			ab.Mul(&abigint, &bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			res, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+			resa, _ = Pair([]G1Affine{ag1}, []G2Affine{g2GenAff})
+			resb, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{bg2})
+
+			resab.Exp(&res, ab)
+			resa.Exp(&resa, bbigint)
+			resb.Exp(&resb, abigint)
+
+			return resab.Equal(&resa) && resab.Equal(&resb) && !res.Equal(&zero)
+
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] MillerLoop of pairs should be equal to the product of MillerLoops", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var simpleProd, factorizedProd GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			P0 := []G1Affine{g1GenAff}
+			P1 := []G1Affine{ag1}
+			Q0 := []G2Affine{g2GenAff}
+			Q1 := []G2Affine{bg2}
+
+			// FE( ML(a,b) * ML(c,d) * ML(e,f) * ML(g,h) )
+			M1, _ := MillerLoop(P0, Q0)
+			M2, _ := MillerLoop(P1, Q0)
+			M3, _ := MillerLoop(P0, Q1)
+			M4, _ := MillerLoop(P1, Q1)
+			simpleProd.Mul(&M1, &M2).Mul(&simpleProd, &M3).Mul(&simpleProd, &M4)
+			simpleProd = FinalExponentiation(&simpleProd)
+
+			tabP := []G1Affine{g1GenAff, ag1, g1GenAff, ag1}
+			tabQ := []G2Affine{g2GenAff, g2GenAff, bg2, bg2}
+
+			// FE( ML([a,c,e,g] ; [b,d,f,h]) ) -> saves 3 squares in Fqk
+			factorizedProd, _ = Pair(tabP, tabQ)
+
+			return simpleProd.Equal(&factorizedProd)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] PairingCheck", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var g1GenAffNeg G1Affine
+			g1GenAffNeg.Neg(&g1GenAff)
+			tabP := []G1Affine{g1GenAff, g1GenAffNeg}
+			tabQ := []G2Affine{g2GenAff, g2GenAff}
+
+			res, _ := PairingCheck(tabP, tabQ)
+
+			return res
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BW6-756] MillerLoop should skip pairs with a point at infinity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var one GT
+
+			var ag1, g1Inf G1Affine
+			var bg2, g2Inf G2Affine
+
+			var abigint, bbigint big.Int
+
+			one.SetOne()
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			g1Inf.FromJacobian(&g1Infinity)
+			g2Inf.FromJacobian(&g2Infinity)
+
+			// e([0,c] ; [b,d])
+			tabP := []G1Affine{g1Inf, ag1}
+			tabQ := []G2Affine{g2GenAff, bg2}
+			res1, _ := Pair(tabP, tabQ)
+
+			// e([a,c] ; [0,d])
+			tabP = []G1Affine{g1GenAff, ag1}
+			tabQ = []G2Affine{g2Inf, bg2}
+			res2, _ := Pair(tabP, tabQ)
+
+			// e([0,c] ; [d,0])
+			tabP = []G1Affine{g1Inf, ag1}
+			tabQ = []G2Affine{bg2, g2Inf}
+			res3, _ := Pair(tabP, tabQ)
+
+			return res1.Equal(&res2) && !res2.Equal(&res3) && res3.Equal(&one)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkPairing(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkMillerLoop(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		MillerLoop([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkFinalExponentiation(b *testing.B) {
+
+	var a GT
+	a.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		FinalExponentiation(&a)
+	}
+
+}
+
+func BenchmarkMultiMiller(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				MillerLoop(P, Q)
+			}
+		})
+	}
+}
+
+func BenchmarkMultiPair(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				Pair(P, Q)
+			}
+		})
+	}
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index cea2fb10d..3615cf261 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, BLS12-378 and BW6-756 elliptic curves implementation (+pairing).
 //
 // Also
 //
@@ -45,11 +45,12 @@ const (
 	BLS24_315
 	BW6_761
 	BW6_633
+	BW6_756
 )
 
 // Implemented return the list of curves fully implemented in gnark-crypto
 func Implemented() []ID {
-	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378}
+	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378, BW6_756}
 }
 
 func (id ID) String() string {
@@ -69,6 +70,8 @@ func (id ID) String() string {
 		return "bw6_633"
 	case BLS24_315:
 		return "bls24_315"
+	case BW6_756:
+		return "bw6_756"
 	default:
 		panic("unimplemented ecc ID")
 	}
@@ -93,6 +96,8 @@ func (id ID) Info() Info {
 		return newInfo(&config.BW6_633)
 	case BLS24_315:
 		return newInfo(&config.BLS24_315)
+	case BW6_756:
+		return newInfo(&config.BW6_756)
 	default:
 		panic("unimplemented ecc ID")
 	}
diff --git a/ecc/ecc.md b/ecc/ecc.md
index b394e4d72..9a6bfab85 100644
--- a/ecc/ecc.md
+++ b/ecc/ecc.md
@@ -6,6 +6,8 @@
 * BW6-761 (EC supporting pairing on BLS12-377 field of definition)
 * BLS24-315
 * BW6-633 (EC supporting pairing on BLS24-315 field of definition)
+* BLS12-378 (GT-strong SNARK-friendly)
+* BW6-756 (EC supporting pairing on BLS12-378 field of definition)
 
 ### Twisted edwards curves
 
diff --git a/internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0 b/internal/generator/addchain/1eed5b76b77315c55824fc3c6ad19eb92f19a5f5859d13f7e464428aa2c74d998d5ce788548db3d6059025d409f55414fd63967a0dcc8dc5259a2bdb6c8d4a860554784b1bcfbda16d0bd0d0a49d80678fcc7f0d0
new file mode 100644
index 0000000000000000000000000000000000000000..6833575420e8c8195f285e34aebf84e69474299f
GIT binary patch
literal 6633
zcmXBY4`kKv9>DSEIe&I%l9^0*W+q7{lVmc<B-5RAXJ#^!Ze};>w>y(bl5RJXBuSDa
zNs=U)BuSDaNs=T<k|arzB+1$RyuQcpc|Fhj^ZV_WowIYk-?Lih|5wWSZ-dkS`S<j{
z{>t-*Ql3AUQv3XW%Q=_+Ic3W675~VUl=DFBDb@Zy7hhK^x1~}kl`5&!ka9zA^mR?P
z1-6B@X8)l@EwZ)R7Telv?Y0hEr)`OCsco5Uxvk5#!q#nDX<KDmZR@eEvGv;4rrev?
zrQDbMQeL0er@SF=NVz}vr`(n|rug;;Qr?s|rMx+BPI)j7raY8~QXbC3DUalll)Lhl
zlt=Su%42yf<*j*Z%G>g`l(*;YDUavzlqd2;$~}2U$~*JUly~J_DNp9fl&A7k%F}r|
z<=uIA$}@Q;<<)sl%6s$Pl=tO*Deuqw;|V?x%kx}3xvljleKya}#=kopD~=<v;y4;B
zj$^UnI36pG6R}cWljl#ya4J?Dr(?x&CRQA0W5sbUR?01T{(KA<V#RSWRvedN#c?@S
z99Lq+aWz)T@%>zj;d-n%=3>QhBUT(YW5sbRRvfou#c?NA9Cu@-yeQA#i{XB(I3C1`
z<6*3nD|!A=43A^Q@g!CpPh-XLELI%PW5w|zRva&5#qlau9Is=g+?40v#PBv&9PeVq
z@jg}@A7aJvF;*O(V#V<}RvceqrMxcBe~sZ=tT?{MisMJDIDW>8<5#RWe#eUAPpmj<
zu~Kf%wVYCHB}4_$fM`TCAr>GOBK$ecq6M#!n)gR7p86mbBiaz{hz?Tmhjp5lAeJJQ
zA(kV$5GxSfh?R&{q~cg@?m?_U^di<G)*<?+Kaln24TyfkM#KPO6RCKpo6UoWA;d6Z
z1hEA%iWoy|B^3{Bn|V8895I2|f!K-Ig_uN4A*QK6FT2e%h&_nCh<%9thy#d&q~d3E
z$UKWUj5vZgia3TiPAZ=E6XuhMQ;5@eht={K^I60>#CgO8#6`p<#AU=4Qt`^YYQBcJ
zj+jH-K-@&!Lfj@5uiQK4yNG*;`-lgKhlodr$A~A0r=;R#{>=Ow@dEJ@@e1)8@do#a
zT7GMOhj@?pfcQxLUH{4a8Sw@274Z%69q|M46Y+~wykCEt{~(HgB1#2Y2@yZgxWC5M
zfM~=;y;f>6FF-6rG$VXXi&%tcMJy({Se4q$?T8LUCt?X=DPkF7Iid^K{#t2;xf`((
zu?n#o(Suk+{Soz=*CN&-`Vi|88xZ}7jU<<|(tvprVl!e8F+_0%Ee)GT5L*zVh%v-g
z#5Tlse6FaK#?2Fm9f+NXU5H7<6k-~&o8mH4nlbM|>_zND>_;3x97G&K%pwj`T$xKp
z%tsN&5XTWG5GN6*5U25(qgFa&K8rYqIFGo1xQMt!{h?hpUqM_&Tti$(%pq=2|GC~Y
z-$L9*+(Fz$+(X<)JU~1|Jfi-ReQbV$c#3$2c#e31c!_w0cun!<F1<0oMZ812M|?nh
zM0}$DQu=KEg7}K~hWL*7f%u8|h4@WzyD0rJ*N7+=Y$b}@MY&>bKr|wn5DO3s5zUAe
z#3Dp1Vll-HBz{SVZbx(=I!UEkd5LK$Vi{sNBCajvF0lg9jaZ3Tg;<T~L99XaQrv;d
zYt8EreTeml4TyfkMv8Y)dBD60u^BOl7(xsqMi5&NqlhuYR>U^Mc8a@PdE7jK*nx=a
zOL?c*g_uN4QU89Y&ASmZh&_nCh<%9t6mO040rNq`A;c`=FyaW}DB>96IN}82B;pjw
z=hX6P^BKfh#5u%y#08R%!R3qQONh&eD~PLzYt&zk*UfW?8;F~TTZr3;JBYi8dx-lK
zm&Nh}^FzcV#AC!0#8bpG#B+-4WBG;oCE^w0HR27$t*iXj{0{LR5m%S;2k{Z{3Go^6
z1@RT}4e=fE1Hb0h%0JD&5Wf+B5H%ty1zQPGq4<nnX)re;nh*;R3lYtT7Q`Y%D`GLl
zcUNgMw<9_boroofrHEyS<%llC3W^tGrQ5s`u?n#o5zkDeN322gQoPD4Yt8EreTeml
z4TyfkMv9+oWx%`%u^BOl7(xsqMi5&Nqlhv5wMMP7)w~U{9WjoWK<uEnA60gmcOfPb
zQ;2EAZo~}r|9OvjFJd2JKjHx5AmR}9*UPN=FyaW}DB>96IN}82B;pj}G{yC>a>jfX
zaSm}FaRG4=aS3sm;=)+DV!n#FhPaNHL)<{zq`2o+ZkcZ*?jY_W?ji0Y9v~hf9w8nh
zo*<r5|2ChQpCeu%ULsym{2owwZGMAzi+G24kNAN2i1<YDi&EvY`3vGJ;v3>S;s@d<
z;uqpK;t!%maqq6Cn5!j31<`<LL^L55P<%A2E;KhIS`dp6t%$`GkEq&aZbx(=IuT0{
zOA*Tu%PDSE)h_c2L^on3VijUFq6e{t;^SPk*Sr?74$(*PQ>?BxZ$R`THX;TPn-H53
zgNPx-Fk*z{Z;z^5%yD(8j*2nFR_fovHuHAGIAQ{^1F;ja3o(h9LQEreQ~dQ#b;i60
z5!aXMUa=3cA8`P25OIi<JL8x&A4VKO9Hst1j+u`mP9RPqP9aVs&LGYr&LPetE>PTJ
zsu#_d5SI~G5LXe`5Z4iNh#M3i6RS7Pw-C1xcPK7A)w||<i2H~Kh=+(rh{uR0h^L5W
zi06nGh?j_0h}RUKajI|3ZxQbh?-3sm9}%A@eg~|6Hh)2UMSMeiNBp4v-ur3(h4_v5
zgQ!tFK@BP9h7zKJXh1X~nh*;R3lYtT7W|!atznV5mH3sXVX>(V(T?aqbRw1@mLirR
UmQ#FQXy`JpKy)KkB36<955X2p+5i9m

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000 b/internal/generator/addchain/7bb56ddaddcc57156093f0f1ab467ae4bc6697d616744fdf91910a2a8b1d366635739e215236cf581640975027d55053f58e59e8373237149668af6db2352a181551e12c6f3ef685b42f43429276019e3f31fc34200000000000000000000
new file mode 100644
index 0000000000000000000000000000000000000000..d790b3f46ea5711f6eeb322c90ad8e05d6a8c1e5
GIT binary patch
literal 7376
zcmXBY4`kKv9>DSEIe&JOWG0iDBuSDaGc%b<(oJ?}W|EoRncd8OyE92<rrS-DBuTnS
zk|arzBuSDaNs=T<k|arzB+1$RyuL@z>v`Uv-*4rdvva=ZY|ZokE#>^T-syk;Yx<vm
z<@p0C&+kvEb^br)oJ;?hGUfP+zvoKId0Xr$)mr}-Uso&tl}e>ls-#jw$_;seuUlwa
zWLs=&^go)^5?ixvsjbD<YFlPoZd+knX<KDmZChhoYg=bqZ`)vNv$fkgY@N1^DR<_s
zl)H0x%A4}0lsD(iDR0SJQf|pxQ{4P*DR0l)Q|`$<Dfi~yl>2gD%Kf=N<$*kq@|wIO
z<(+wF%7b|@<)J*3@^Btbc_fddJeo&S9?N4Xx943ckLU4}C-OwflX)`bsXUePbe>Lm
zcix@yp1ddJw!AmxnLLy7Y@SVdF3-gSyf2o=xp;7!>ks<DJby6$+o4!-9F7&okyvpY
zjTOhSSaBSWm2yX(KM}*pSaF<+702mVah!=2$JtmZH|6<rF`SPT$AwsNT#OaRrC4!X
zjupq1SSiQtxf;W@SaDpB6~~QOaomg*$E{d#+>RB;omg?)jg|6}Jby2S`?2D95G#&{
zu~M$&`A0E4jupp~SaCd!700t!aXgO|$BS5Tyo?pct5|Woj+OGlJpU$!x3S`Q7b}kU
zvEuj;D~^w`;`kIRj?b~;_!2ASt~~!WhHtUr_#P{cAF<;287q!ovEuk0D~><0;;6++
zxiQyrO0ks?6+{DK0b(Iy5n?gIztbq1@EoamebnNi4`L~z1<{IFMk@ZX<>nQLm55b{
z)rd8SwTN|y^@t6m;%GCsBRUYBh>eIYL^t(2vdO#|u?4Xeu??}ERNU1bb1$L~(T^BF
z>_F^93?ha|#T^?qk03@7V~Aadal`~-5;29CrvA9>Ht#{~Ma&>(5p#%ri2bDEdvw5j
z5OD}`7;yw~6mg7HJnYBKClDtQr|=4^<<sUfh_i@ui1UaGh>M6zh|8qnnR~^26>$x5
z9dQG36LAZ1n^Zh=@0jl*?ji0Y9v~hf9w8nho*<r*il_NA^K--t#7o30#B0PGyie5f
zTk|`_d&CFCN9wQoPv*~vFNm**Z;0=RABdlbU!>yo`rG^mQT&N06>KF$d_&{?HMR!C
z0$kK<rG@52h{cFTgs*85OAyV7r6d=tQj572u?(>su>!FYu?n#ou?E-vT4}9$9b!FV
z1ELMlj_9C%k2=j85nYIG#3saM#1_O>lFM0Xn|V8;2hoe@qqu^W`ppA~9f+NXLBtSZ
z7%_s+6}8f+c?_`&F^-r(Od_Tb(}>*^mzmNY^IpUZViqxn*oWAUIDj~aI7D$}E*&->
zK^#RKLmWq(K%7LJ!e@?J>9qL_;w<7E;ymI4;v)6CcFBAhaRqS|aSd@DafABz^``k2
z;x^(A;x6JI;y&U5;vwP@^{4D(^Ap5V#52Tm#0$hr#4E&WiWhh3jrlF&9pXLW1L7m%
z6ZNOkXY&`tSHw5Ocf=3GPsA_8Z;H2z(jRk;h;qSJqIkO~SIiBF1&D=+MTo_SMnn^0
z38ERXl;RDf++uD;EJG|Om1^Y`rj>|Qh}DR=wv^Y1wTN|y^@t6KHbgt31JOzG4qV=7
z?m~1UHX$}6wjj1ryo$=(%-az?h+ae=q8~AU*n!xI7(@&qh7lta?{ei)^B7_mBCapx
zaWR3IL`+e?z0>C1h&_nCh#ABzVvgdaQQl|Xk2ruhh&Y5ej5vZgia3TijyQohN%A?h
ze9C+paRzY~aSm~w<YRF8g83ri65=xA3gRmDr{gvAb;J$CO~ft4ZNweKUBo@aeTvIs
z`GNT%;t}F8;tAp@;u+#O#r3iL!u%5P3h^58hT^TO{MP&q@g5OZm+}Yk5%CG}8Sw@2
z74Z%69q|J{=GMwT&A$-85q}UhA}R%22~nZ=j9+OmFF-6rEJ7?sG$NW1OAyV7r4+ZW
z(qe8!EJG|utU#<ptU|0ttU;`$ctTdznb#vWAleY|$W+=z2cnbWSytI-?m~1UHX$}6
zwjj1rd}k}$%-az?h+ae=q8~AU*n!xI7{sqNYLy}LFk%ETiWo!eqIf^5jGHGAlZYwA
zG-5Ym5B0yi*F1xmMa&`gA@(B<P=CH0G#^47MjSyLMI1vMN1Q;MM4Y0y9#&49&mhhs
z&LPetE+8%<E>T<<E0@hz5LXe`5Z4hm5H~5_b1S#Zw-I*`cM<mx_Yn^e4-t<Lj}cE0
zPpRL`XXfXK7l@aLR}?=7R9>6kAl@S0A>JcCAU+~KQT(7(`E34z_=@<3_>TC2_=)(1
z_>K63s8PIkS5wT@5~6}=KrBEkL@c8CXjWZpZbUR8mLQrDODXPAwZ+_uScX`RSb<oH
zScO<k@m5t`V_u6`hggr;fM`RsBRVKP&Q&|j8xdWIZi??>b(48BVhds`VjE&Rq6g87
z=tJ})21tH;RNY~Yt4npK7(@(FzX`+U5yU8B46zF_j+j79BBl`2h}{&w-l^^}??uG*
zr8*;K5p#%ri2aBIq`W+igXTks!-yl)@5oW}F~o7i3B*anDa2{S8N^w{ImCI2x0vb$
z^F_oZ#AU=4#8t#K#C60CijRrao90`H+lV_97oO@}^F73U!~?`b#3RIG#1q6*#52Tm
z#0$hr#4E&WiqAOJH|Dp9cZm0h4~UP5PZU1`RzI7+Aig5LA-*GiP=D?HH2*^UM*Kn4
zC?24O6mvrfQ9(2y79bWP79kcR8WBzSopY^WiMg5hk*8s)sRhxBScX`RSb<oHScO<k
z@p++Pjd?9%9b!FV1ELK#pxxAg=tOKpxXE3j8?gzo8L<Vi6|oJm9npj6Mf4&15piv4
z7!W%UI}wA3A;d6Z1Tl&jL+nC~BPI}&h$+N0VmD$BVlQF_F^iZ(>_hBF96%gI96}sM
z96=mK977yOoIspJoI;#NoI#vLoI{*PTtHkzTtZw%TtQq#Tti$(+(6t!+(O(&+(Fz$
f+(X<)JU~1|JVHE1JV87~JVQK3yg<A}ydwP{f(%R!

literal 0
HcmV?d00001

diff --git a/internal/generator/config/bw6-756.go b/internal/generator/config/bw6-756.go
new file mode 100644
index 000000000..37a8d043f
--- /dev/null
+++ b/internal/generator/config/bw6-756.go
@@ -0,0 +1,28 @@
+package config
+
+var BW6_756 = Curve{
+	Name:         "bw6-756",
+	CurvePackage: "bw6756",
+	EnumID:       "BW6_756",
+	FrModulus:    "605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417",
+	FpModulus:    "366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+		Projective:       true,
+	},
+	G2: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           []int{4, 5, 8, 16},
+	},
+}
+
+func init() {
+	addCurve(&BW6_756)
+}
diff --git a/internal/generator/tower/generate.go b/internal/generator/tower/generate.go
index 5c4107907..74c9f7a66 100644
--- a/internal/generator/tower/generate.go
+++ b/internal/generator/tower/generate.go
@@ -12,7 +12,7 @@ import (
 
 // Generate generates a tower 2->6->12 over fp
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_761) || conf.Equal(config.BW6_633) || conf.Equal(config.BLS24_315) {
+	if conf.Equal(config.BW6_756) || conf.Equal(config.BW6_761) || conf.Equal(config.BW6_633) || conf.Equal(config.BLS24_315) {
 		return nil
 	}
 

From 7fc8a4942cff878884ed8f06097cc65a0d6776d8 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 28 Dec 2021 12:26:34 +0100
Subject: [PATCH 11/29] build: templates for bw6-756

---
 ecc/bw6-633/g1.go                             |  1 +
 ecc/bw6-633/g2.go                             |  1 +
 ecc/bw6-756/fr/fft/fft.go                     |  3 +-
 ecc/bw6-756/fr/fft/fuzz.go                    |  1 +
 ecc/bw6-756/g1.go                             |  2 -
 ecc/bw6-756/g1_test.go                        |  6 ++-
 ecc/bw6-756/g2.go                             |  1 -
 ecc/bw6-756/g2_test.go                        |  6 ++-
 .../crypto/signature/eddsa/generate.go        |  4 ++
 .../signature/eddsa/template/eddsa.go.tmpl    |  8 +--
 internal/generator/ecc/template/point.go.tmpl | 54 ++++++++++++++++++-
 .../ecc/template/tests/marshal.go.tmpl        |  2 +-
 .../ecc/template/tests/point.go.tmpl          |  2 +-
 internal/generator/edwards/generate.go        |  4 ++
 .../generator/fft/template/domain.go.tmpl     |  3 ++
 .../generator/fft/template/imports.go.tmpl    |  4 ++
 .../pairing/template/tests/pairing.go.tmpl    |  4 +-
 17 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/ecc/bw6-633/g1.go b/ecc/bw6-633/g1.go
index 17baa6bec..0c9ca84df 100644
--- a/ecc/bw6-633/g1.go
+++ b/ecc/bw6-633/g1.go
@@ -531,6 +531,7 @@ func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-633/g2.go b/ecc/bw6-633/g2.go
index c28ec1c5e..831624a1f 100644
--- a/ecc/bw6-633/g2.go
+++ b/ecc/bw6-633/g2.go
@@ -529,6 +529,7 @@ func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/fr/fft/fft.go b/ecc/bw6-756/fr/fft/fft.go
index 503f375ba..290c071a2 100644
--- a/ecc/bw6-756/fr/fft/fft.go
+++ b/ecc/bw6-756/fr/fft/fft.go
@@ -21,8 +21,9 @@ import (
 	"runtime"
 
 	"github.com/consensys/gnark-crypto/ecc"
-	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 	"github.com/consensys/gnark-crypto/internal/parallel"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 )
 
 // Decimation is used in the FFT call to select decimation in time or in frequency
diff --git a/ecc/bw6-756/fr/fft/fuzz.go b/ecc/bw6-756/fr/fft/fuzz.go
index 1c35691b5..98552dcf3 100644
--- a/ecc/bw6-756/fr/fft/fuzz.go
+++ b/ecc/bw6-756/fr/fft/fuzz.go
@@ -23,6 +23,7 @@ import (
 	"bytes"
 	"fmt"
 	"github.com/consensys/gnark-crypto/ecc"
+
 	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 )
 
diff --git a/ecc/bw6-756/g1.go b/ecc/bw6-756/g1.go
index 2a1f0a5bf..f3ac868b6 100644
--- a/ecc/bw6-756/g1.go
+++ b/ecc/bw6-756/g1.go
@@ -506,7 +506,6 @@ func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
 
 // ClearCofactor maps a point in E(Fp) to E(Fp)[r]
 func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
-
 	var L0, L1, uP, u2P, u3P, tmp G1Jac
 
 	uP.ScalarMultiplication(a, &xGen)
@@ -533,7 +532,6 @@ func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
 		AddAssign(&L0)
 
 	return p
-
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/g1_test.go b/ecc/bw6-756/g1_test.go
index a38dbeb3a..06d5277a6 100644
--- a/ecc/bw6-756/g1_test.go
+++ b/ecc/bw6-756/g1_test.go
@@ -38,7 +38,8 @@ func TestG1AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res1, res2 G1Jac
-			p = fuzzJacobianG1Affine(&g1Gen, a)
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
 			res1.phi(&p)
 			res2.mulWindowed(&p, &lambdaGLV)
 
@@ -50,7 +51,8 @@ func TestG1AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res, tmp G1Jac
-			p = fuzzJacobianG1Affine(&g1Gen, a)
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
 			tmp.phi(&p)
 			res.phi(&tmp).
 				AddAssign(&tmp).
diff --git a/ecc/bw6-756/g2.go b/ecc/bw6-756/g2.go
index 934f7d6f1..d7fbb9659 100644
--- a/ecc/bw6-756/g2.go
+++ b/ecc/bw6-756/g2.go
@@ -524,7 +524,6 @@ func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
 		AddAssign(&L1)
 
 	return p
-
 }
 
 // -------------------------------------------------------------------------------------------------
diff --git a/ecc/bw6-756/g2_test.go b/ecc/bw6-756/g2_test.go
index c2fe39936..b068b6d3a 100644
--- a/ecc/bw6-756/g2_test.go
+++ b/ecc/bw6-756/g2_test.go
@@ -38,7 +38,8 @@ func TestG2AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi(P) = lambdaGLV * P", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res1, res2 G2Jac
-			p = fuzzJacobianG2Affine(&g2Gen, a)
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
 			res1.phi(&p)
 			res2.mulWindowed(&p, &lambdaGLV)
 
@@ -50,7 +51,8 @@ func TestG2AffineEndomorphism(t *testing.T) {
 	properties.Property("[BW6-756] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
 		func(a fp.Element) bool {
 			var p, res, tmp G2Jac
-			p = fuzzJacobianG2Affine(&g2Gen, a)
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
 			tmp.phi(&p)
 			res.phi(&tmp).
 				AddAssign(&tmp).
diff --git a/internal/generator/crypto/signature/eddsa/generate.go b/internal/generator/crypto/signature/eddsa/generate.go
index 1422a4137..dd4f82519 100644
--- a/internal/generator/crypto/signature/eddsa/generate.go
+++ b/internal/generator/crypto/signature/eddsa/generate.go
@@ -8,6 +8,10 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
+	if conf.Equal(config.BW6_756) {
+		return nil
+	}
+
 	// eddsa
 	conf.Package = "eddsa"
 	entries := []bavard.Entry{
diff --git a/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl b/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
index da28a4b4e..7d1262c0e 100644
--- a/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
+++ b/internal/generator/crypto/signature/eddsa/template/eddsa.go.tmpl
@@ -52,7 +52,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	var pub PublicKey
 	var priv PrivateKey
 
-    {{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+    {{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	// The source of randomness and the secret scalar must come
 	// from 2 distincts sources. Since the scalar is the size of the
 	// field of definition (48 bytes), the scalar must come from a
@@ -87,7 +87,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 
 	// prune the key
 	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
-	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	h1[0] &= 0xF8
 	h1[sizeFr-1] &= 0x7F
 	h1[sizeFr-1] |= 0x40
@@ -100,14 +100,14 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	// reverse first bytes because setBytes interpret stream as big endian
 	// but in eddsa specs s is the first 32 bytes in little endian
 	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
-		{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+		{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 		h1[i], h1[j] = h1[j], h1[i]
 		{{ else }}
 		h[i], h[j] = h[j], h[i]
 		{{ end }}
 	}
 
-	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+	{{ if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	copy(priv.scalar[:], h1[:sizeFr])
 	{{ else }}
 	copy(priv.scalar[:], h[:sizeFr])
diff --git a/internal/generator/ecc/template/point.go.tmpl b/internal/generator/ecc/template/point.go.tmpl
index 1c7eae8a3..d536b51a0 100644
--- a/internal/generator/ecc/template/point.go.tmpl
+++ b/internal/generator/ecc/template/point.go.tmpl
@@ -416,7 +416,7 @@ func (p *{{ $TJacobian }}) IsOnCurve() bool {
 
 		}
 	{{- end}}
-{{else if eq .Name "bw6-761"}}
+{{else if or (eq .Name "bw6-761") (eq .Name "bw6-756")}}
 	// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
 	// Z[r,0]+Z[-lambda{{ $TAffine }}, 1] is the kernel
 	// of (u,v)->u+lambda{{ $TAffine }}v mod r. Expressing r, lambda{{ $TAffine }} as
@@ -749,6 +749,33 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	p.phi(&L1).AddAssign(&L0)
 
     return p
+{{else if eq .Name "bw6-756"}}
+	var L0, L1, uP, u2P, u3P, tmp G1Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+
+	L0.Set(a).AddAssign(&u3P).
+		SubAssign(&u2P)
+	tmp.Set(a).AddAssign(&u2P).
+		SubAssign(&uP).
+		SubAssign(&uP).
+		Double(&tmp)
+	L0.SubAssign(&tmp).
+		SubAssign(a)
+
+	L1.Set(a).AddAssign(&uP)
+	tmp.Set(&uP).SubAssign(a).
+		Double(&tmp).
+		SubAssign(&u2P)
+	L1.AddAssign(&tmp).
+		SubAssign(a)
+
+	p.phi(&L1).
+		AddAssign(&L0)
+
+	return p
 {{- end}}
 }
 {{ else }}
@@ -934,6 +961,31 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	p.phi(&L1).AddAssign(&L0)
 
 	return p
+{{else if eq .Name "bw6-756"}}
+
+	var L0, L1, uP, u2P, u3P, tmp G2Jac
+
+	uP.ScalarMultiplication(a, &xGen)
+	u2P.ScalarMultiplication(&uP, &xGen)
+	u3P.ScalarMultiplication(&u2P, &xGen)
+	// ht=-2, hy=0
+	// d1=1, d2=-1, d3=-1
+
+	L0.Set(a).
+		AddAssign(&u2P).
+		SubAssign(&uP)
+	tmp.Set(&u2P).
+		AddAssign(a).
+		SubAssign(&uP).
+		Double(&tmp)
+	L1.Set(&u3P).
+		SubAssign(&tmp)
+
+	p.phi(&L0).
+		AddAssign(&L1)
+
+	return p
+
 {{- end}}
 }
 {{- end}}
diff --git a/internal/generator/ecc/template/tests/marshal.go.tmpl b/internal/generator/ecc/template/tests/marshal.go.tmpl
index 11ba40f79..51a117544 100644
--- a/internal/generator/ecc/template/tests/marshal.go.tmpl
+++ b/internal/generator/ecc/template/tests/marshal.go.tmpl
@@ -345,7 +345,7 @@ func GenFp() gopter.Gen {
 // e2 e4 e12 e24 for bls24
 // e2 e6 e12 else */}}
 
-{{if or (eq .Name "bw6-633") (eq .Name "bw6-761")}}
+{{if or (eq .Name "bw6-633") (eq .Name "bw6-761") (eq .Name "bw6-756")}}
 	// GenE3 generates an E3 elmt
 	func GenE3() gopter.Gen {
 		return gopter.CombineGens(
diff --git a/internal/generator/ecc/template/tests/point.go.tmpl b/internal/generator/ecc/template/tests/point.go.tmpl
index 1e9aa3695..3d29df278 100644
--- a/internal/generator/ecc/template/tests/point.go.tmpl
+++ b/internal/generator/ecc/template/tests/point.go.tmpl
@@ -64,7 +64,7 @@ import (
         ))
 
         {{if eq .PointName "g2" }}
-        {{- if and (eq .PointName "g2") (ne .Name "bw6-761") (ne .Name "bw6-633") }}
+        {{- if and (eq .PointName "g2") (ne .Name "bw6-761") (ne .Name "bw6-633") (ne .Name "bw6-756") }}
             properties.Property("[{{ toUpper .Name }}] check that psi^2(P) = -phi(P)", prop.ForAll(
                 func(a {{ .CoordType}}) bool {
                     var p, res1, res2 {{ $TJacobian }}
diff --git a/internal/generator/edwards/generate.go b/internal/generator/edwards/generate.go
index 45f6bf1bd..b2ea5b5e1 100644
--- a/internal/generator/edwards/generate.go
+++ b/internal/generator/edwards/generate.go
@@ -8,6 +8,10 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
+	if conf.Equal(config.BW6_756) {
+		return nil
+	}
+
 	conf.Package = "twistededwards"
 
 	entries := []bavard.Entry{
diff --git a/internal/generator/fft/template/domain.go.tmpl b/internal/generator/fft/template/domain.go.tmpl
index 269adacb0..73366c30b 100644
--- a/internal/generator/fft/template/domain.go.tmpl
+++ b/internal/generator/fft/template/domain.go.tmpl
@@ -78,6 +78,9 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 	{{else if eq .Name "bw6-761"}}
 		rootOfUnity.SetString("32863578547254505029601261939868325669770508939375122462904745766352256812585773382134936404344547323199885654433")
 		const maxOrderRoot uint64 = 46
+	{{else if eq .Name "bw6-756"}}
+        rootOfUnity.SetString("199251335866470442271346949249090720992237796757894062992204115206570647302191425225605716521843542790404563904580")
+        const maxOrderRoot uint64 = 41
     {{else if eq .Name "bw6-633"}}
 		rootOfUnity.SetString("4991787701895089137426454739366935169846548798279261157172811661565882460884369603588700158257")
 		const maxOrderRoot uint64 = 20
diff --git a/internal/generator/fft/template/imports.go.tmpl b/internal/generator/fft/template/imports.go.tmpl
index f2b26bcc7..858fe8afe 100644
--- a/internal/generator/fft/template/imports.go.tmpl
+++ b/internal/generator/fft/template/imports.go.tmpl
@@ -10,6 +10,8 @@
 	"github.com/consensys/gnark-crypto/ecc/bn254/fr"
 {{ else if eq .Name "bw6-761"}}
 	"github.com/consensys/gnark-crypto/ecc/bw6-761/fr"
+{{ else if eq .Name "bw6-756"}}
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 {{ else if eq .Name "bw6-633"}}
 	"github.com/consensys/gnark-crypto/ecc/bw6-633/fr"
 {{ else if eq .Name "bls24-315"}}
@@ -29,6 +31,8 @@
 	curve "github.com/consensys/gnark-crypto/ecc/bn254"
 {{else if eq .Name "bw6-761"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bw6-761"
+{{else if eq .Name "bw6-756"}}
+	curve "github.com/consensys/gnark-crypto/ecc/bw6-756"
 {{else if eq .Name "bw6-633"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bw6-633"
 {{ else if eq .Name "bls24-315"}}
diff --git a/internal/generator/pairing/template/tests/pairing.go.tmpl b/internal/generator/pairing/template/tests/pairing.go.tmpl
index 09a0840a9..b4a47335b 100644
--- a/internal/generator/pairing/template/tests/pairing.go.tmpl
+++ b/internal/generator/pairing/template/tests/pairing.go.tmpl
@@ -18,7 +18,7 @@ func TestPairing(t *testing.T) {
 
 	properties := gopter.NewProperties(parameters)
 
-    {{if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+    {{if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 	genA := GenE6()
 	{{else if eq .Name "bls24-315"}}
 	genA := GenE24()
@@ -52,7 +52,7 @@ func TestPairing(t *testing.T) {
 			b.Conjugate(&a)
 			a.Inverse(&a)
 			b.Mul(&b, &a)
-            {{if or (eq .Name "bw6-761") (eq .Name "bw6-633")}}
+            {{if or (eq .Name "bw6-761") (eq .Name "bw6-633") (eq .Name "bw6-756")}}
 			a.Frobenius(&b).
 			{{else if eq .Name "bls24-315"}}
 			a.FrobeniusQuad(&b).

From 12fc5b1920ee623f1a4a95e2a49366e71e393eff Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 31 Dec 2021 15:27:58 +0100
Subject: [PATCH 12/29] feat(bw6-756): add companion twisted Edwards

---
 README.md                                     |  18 +-
 ecc/bw6-756/twistededwards/doc.go             |  18 +
 ecc/bw6-756/twistededwards/eddsa/doc.go       |  22 +
 ecc/bw6-756/twistededwards/eddsa/eddsa.go     | 274 +++++++++++
 .../twistededwards/eddsa/eddsa_test.go        | 208 ++++++++
 ecc/bw6-756/twistededwards/eddsa/marshal.go   | 133 +++++
 ecc/bw6-756/twistededwards/point.go           | 411 ++++++++++++++++
 ecc/bw6-756/twistededwards/twistededwards.go  |  63 +++
 .../twistededwards/twistededwards_test.go     | 456 ++++++++++++++++++
 ecc/ecc.go                                    |   2 +-
 hash/hashes.go                                |   7 +
 .../crypto/signature/eddsa/generate.go        |   4 -
 internal/generator/edwards/generate.go        |   4 -
 signature/signature.go                        |   3 +-
 14 files changed, 1604 insertions(+), 19 deletions(-)
 create mode 100644 ecc/bw6-756/twistededwards/doc.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/doc.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/eddsa.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
 create mode 100644 ecc/bw6-756/twistededwards/eddsa/marshal.go
 create mode 100644 ecc/bw6-756/twistededwards/point.go
 create mode 100644 ecc/bw6-756/twistededwards/twistededwards.go
 create mode 100644 ecc/bw6-756/twistededwards/twistededwards_test.go

diff --git a/README.md b/README.md
index c7fadb01d..832b33bb2 100644
--- a/README.md
+++ b/README.md
@@ -3,17 +3,17 @@
 [![License](https://img.shields.io/badge/license-Apache%202-blue)](LICENSE)  [![Go Report Card](https://goreportcard.com/badge/github.com/ConsenSys/gnark-crypto)](https://goreportcard.com/badge/github.com/ConsenSys/gnark-crypto) [![PkgGoDev](https://pkg.go.dev/badge/mod/github.com/consensys/gnark-crypto)](https://pkg.go.dev/mod/github.com/consensys/gnark-crypto)
 
 `gnark-crypto` provides:
-* [Elliptic curve cryptography](ecc/ecc.md) (+pairing) on BN254, BLS12-381, BLS12-377, BW6-761, BLS24-315 and BW6-633
+* [Elliptic curve cryptography](ecc/ecc.md) (+pairing) on BN254, BLS12-381, BLS12-377, BW6-761, BLS24-315, BW6-633, BLS12-378 and BW6-756
 * [Finite field arithmetic](field/field.md) (fast big.Int)
 * FFT
 * Polynomial commitment schemes
 * MiMC
 * EdDSA (on the "companion" twisted edwards curves)
 
-  
+
 
 `gnark-crypto` is actively developed and maintained by the team (zkteam@consensys.net | [HackMD](https://hackmd.io/@zkteam)) behind:
-* [`gnark`: a framework to execute (and verify) algorithms in zero-knowledge](https://github.com/consensys/gnark) 
+* [`gnark`: a framework to execute (and verify) algorithms in zero-knowledge](https://github.com/consensys/gnark)
 
 
 ## Warning
@@ -28,7 +28,7 @@
 
 `gnark-crypto` is tested with the last 2 major releases of Go (1.16 and 1.17).
 
-### Install `gnark-crypto` 
+### Install `gnark-crypto`
 
 ```bash
 go get github.com/consensys/gnark-crypto
@@ -44,27 +44,27 @@ The APIs are consistent accross the curves. For example, [here is `bn254` godoc]
 
 ### Development
 
-Most (but not all) of the code is generated from the templates in `internal/generator`. 
+Most (but not all) of the code is generated from the templates in `internal/generator`.
 
 The generated code contains little to no interfaces and is strongly typed with a base field (generated by the `gnark-crypto/field`). The two main factors driving this design choice are:
 
-1. Performance: `gnark-crypto` algorithms manipulates millions (if not billions) of field elements. Interface indirection at this level, plus garbage collection indexing takes a heavy toll on perf.  
+1. Performance: `gnark-crypto` algorithms manipulates millions (if not billions) of field elements. Interface indirection at this level, plus garbage collection indexing takes a heavy toll on perf.
 2. No generics in Go: need to derive (mostly) identical code for various moduli and curves, with consistent APIs
 
 To regenerate the files, see `internal/generator/main.go`. Run:
 ```
 go generate ./internal/...
-``` 
+```
 
 ## Benchmarks
 
-[Benchmarking pairing-friendly elliptic curves libraries](https://hackmd.io/@zkteam/eccbench) 
+[Benchmarking pairing-friendly elliptic curves libraries](https://hackmd.io/@zkteam/eccbench)
 
 >The libraries are implemented in different languages and some use more assembly code than others. Besides the different algorithmic and software optimizations used across, it should be noted also that some libraries target constant-time implementation for some operations making it de facto slower. However, it can be clear that consensys/gnark-crypto is one of the fastest pairing-friendly elliptic curve libraries to be used in zkp projects with different curves.
 
 ## Versioning
 
-We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/consensys/gnark-crypto/tags). 
+We use [SemVer](http://semver.org/) for versioning. For the versions available, see the [tags on this repository](https://github.com/consensys/gnark-crypto/tags).
 
 
 ## License
diff --git a/ecc/bw6-756/twistededwards/doc.go b/ecc/bw6-756/twistededwards/doc.go
new file mode 100644
index 000000000..771de3887
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package twistededwards provides bw6-756's twisted edwards "companion curve" defined on fr.
+package twistededwards
diff --git a/ecc/bw6-756/twistededwards/eddsa/doc.go b/ecc/bw6-756/twistededwards/eddsa/doc.go
new file mode 100644
index 000000000..65fdfe7af
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/doc.go
@@ -0,0 +1,22 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package eddsa provides EdDSA signature scheme on bw6-756's twisted edwards curve.
+//
+// See also
+//
+// https://en.wikipedia.org/wiki/EdDSA
+package eddsa
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa.go b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
new file mode 100644
index 000000000..f5ca9d161
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
@@ -0,0 +1,274 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"errors"
+	"hash"
+	"io"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/twistededwards"
+	"github.com/consensys/gnark-crypto/signature"
+	"golang.org/x/crypto/blake2b"
+)
+
+var errNotOnCurve = errors.New("point not on curve")
+
+const (
+	sizeFr         = fr.Bytes
+	sizePublicKey  = sizeFr
+	sizeSignature  = 2 * sizeFr
+	sizePrivateKey = 2*sizeFr + 32
+)
+
+// PublicKey eddsa signature object
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type PublicKey struct {
+	A twistededwards.PointAffine
+}
+
+// PrivateKey private key of an eddsa instance
+type PrivateKey struct {
+	PublicKey PublicKey    // copy of the associated public key
+	scalar    [sizeFr]byte // secret scalar, in big Endian
+	randSrc   [32]byte     // source
+}
+
+// Signature represents an eddsa signature
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type Signature struct {
+	R twistededwards.PointAffine
+	S [sizeFr]byte
+}
+
+func init() {
+	signature.Register(signature.EDDSA_BW6_756, GenerateKeyInterfaces)
+}
+
+// GenerateKey generates a public and private key pair.
+func GenerateKey(r io.Reader) (PrivateKey, error) {
+
+	c := twistededwards.GetEdwardsCurve()
+
+	var pub PublicKey
+	var priv PrivateKey
+
+	// The source of randomness and the secret scalar must come
+	// from 2 distincts sources. Since the scalar is the size of the
+	// field of definition (48 bytes), the scalar must come from a
+	// different digest so there is no overlap between the source of
+	// randomness and the scalar.
+
+	// used for random scalar (aka private key)
+	seed := make([]byte, 32)
+	_, err := r.Read(seed)
+	if err != nil {
+		return priv, err
+	}
+	h1 := blake2b.Sum512(seed[:])
+
+	// used for the source of randomness when hashing the message
+	h2 := blake2b.Sum512(h1[:])
+	for i := 0; i < 32; i++ {
+		priv.randSrc[i] = h2[i]
+	}
+
+	// prune the key
+	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
+
+	h1[0] &= 0xF8
+	h1[sizeFr-1] &= 0x7F
+	h1[sizeFr-1] |= 0x40
+
+	// reverse first bytes because setBytes interpret stream as big endian
+	// but in eddsa specs s is the first 32 bytes in little endian
+	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
+
+		h1[i], h1[j] = h1[j], h1[i]
+
+	}
+
+	copy(priv.scalar[:], h1[:sizeFr])
+
+	var bscalar big.Int
+	bscalar.SetBytes(priv.scalar[:])
+	pub.A.ScalarMul(&c.Base, &bscalar)
+
+	priv.PublicKey = pub
+
+	return priv, nil
+}
+
+// GenerateKeyInterfaces generate interfaces for the public/private key.
+// This purpose of this function is to be registered in the list of signature schemes.
+func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
+	priv, err := GenerateKey(r)
+	return &priv, err
+}
+
+// Equal compares 2 public keys
+func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+	bpk := pub.Bytes()
+	bother := other.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bother) == 1
+}
+
+// Public returns the public key associated to the private key.
+// From Signer interface defined in gnark/crypto/signature.
+func (privKey *PrivateKey) Public() signature.PublicKey {
+	var pub PublicKey
+	pub.A.Set(&privKey.PublicKey.A)
+	return &pub
+}
+
+// Sign sign a message
+// Pure Eddsa version (see https://tools.ietf.org/html/rfc8032#page-8)
+func (privKey *PrivateKey) Sign(message []byte, hFunc hash.Hash) ([]byte, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	var res Signature
+
+	// blinding factor for the private key
+	// blindingFactorBigInt must be the same size as the private key,
+	// blindingFactorBigInt = h(randomness_source||message)[:sizeFr]
+	var blindingFactorBigInt big.Int
+
+	// randSrc = privKey.randSrc || msg (-> message = MSB message .. LSB message)
+	randSrc := make([]byte, 32+len(message))
+	for i, v := range privKey.randSrc {
+		randSrc[i] = v
+	}
+	copy(randSrc[32:], message)
+
+	// randBytes = H(randSrc)
+	blindingFactorBytes := blake2b.Sum512(randSrc[:]) // TODO ensures that the hash used to build the key and the one used here is the same
+	blindingFactorBigInt.SetBytes(blindingFactorBytes[:sizeFr])
+
+	// compute R = randScalar*Base
+	res.R.ScalarMul(&curveParams.Base, &blindingFactorBigInt)
+	if !res.R.IsOnCurve() {
+		return nil, errNotOnCurve
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	resRX := res.R.X.Bytes()
+	resRY := res.R.Y.Bytes()
+	resAX := privKey.PublicKey.A.X.Bytes()
+	resAY := privKey.PublicKey.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], resRX[:])
+	copy(dataToHash[sizeFr:], resRY[:])
+	copy(dataToHash[2*sizeFr:], resAX[:])
+	copy(dataToHash[3*sizeFr:], resAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	_, err := hFunc.Write(dataToHash[:])
+	if err != nil {
+		return nil, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// Compute s = randScalarInt + H(R,A,M)*S
+	// going with big int to do ops mod curve order
+	var bscalar, bs big.Int
+	bscalar.SetBytes(privKey.scalar[:])
+	bs.Mul(&hramInt, &bscalar).
+		Add(&bs, &blindingFactorBigInt).
+		Mod(&bs, &curveParams.Order)
+	sb := bs.Bytes()
+	if len(sb) < sizeFr {
+		offset := make([]byte, sizeFr-len(sb))
+		sb = append(offset, sb...)
+	}
+	copy(res.S[:], sb[:])
+
+	return res.Bytes(), nil
+}
+
+// Verify verifies an eddsa signature
+func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	// verify that pubKey and R are on the curve
+	if !pub.A.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// Deserialize the signature
+	var sig Signature
+	if _, err := sig.SetBytes(sigBin); err != nil {
+		return false, err
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	sigRX := sig.R.X.Bytes()
+	sigRY := sig.R.Y.Bytes()
+	sigAX := pub.A.X.Bytes()
+	sigAY := pub.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], sigRX[:])
+	copy(dataToHash[sizeFr:], sigRY[:])
+	copy(dataToHash[2*sizeFr:], sigAX[:])
+	copy(dataToHash[3*sizeFr:], sigAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	if _, err := hFunc.Write(dataToHash[:]); err != nil {
+		return false, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// lhs = cofactor*S*Base
+	var lhs twistededwards.PointAffine
+	var bCofactor, bs big.Int
+	curveParams.Cofactor.ToBigInt(&bCofactor)
+	bs.SetBytes(sig.S[:])
+	lhs.ScalarMul(&curveParams.Base, &bs).
+		ScalarMul(&lhs, &bCofactor)
+
+	if !lhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// rhs = cofactor*(R + H(R,A,M)*A)
+	var rhs twistededwards.PointAffine
+	rhs.ScalarMul(&pub.A, &hramInt).
+		Add(&rhs, &sig.R).
+		ScalarMul(&rhs, &bCofactor)
+	if !rhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// verifies that cofactor*S*Base=cofactor*(R + H(R,A,M)*A)
+	if !lhs.X.Equal(&rhs.X) || !lhs.Y.Equal(&rhs.Y) {
+		return false, nil
+	}
+
+	return true, nil
+}
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
new file mode 100644
index 000000000..7d284bee3
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/sha256"
+	"math/rand"
+	"testing"
+
+	crand "crypto/rand"
+
+	"fmt"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/consensys/gnark-crypto/hash"
+	"github.com/consensys/gnark-crypto/signature"
+)
+
+func Example() {
+	// instantiate hash function
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	// create a eddsa key pair
+	privateKey, _ := signature.EDDSA_BW6_756.New(crand.Reader)
+	publicKey := privateKey.Public()
+
+	// note that the message is on 4 bytes
+	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
+
+	// sign the message
+	signature, _ := privateKey.Sign(msg, hFunc)
+
+	// verifies signature
+	isValid, _ := publicKey.Verify(signature, msg, hFunc)
+	if !isValid {
+		fmt.Println("1. invalid signature")
+	} else {
+		fmt.Println("1. valid signature")
+	}
+
+	// Output: 1. valid signature
+}
+
+func TestSerialization(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	privKey1, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey1 := privKey1.Public()
+
+	privKey2, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey2 := privKey2.Public()
+
+	pubKeyBin1 := pubKey1.Bytes()
+	pubKey2.SetBytes(pubKeyBin1)
+	pubKeyBin2 := pubKey2.Bytes()
+	if len(pubKeyBin1) != len(pubKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(pubKeyBin1); i++ {
+		if pubKeyBin1[i] != pubKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+
+	privKeyBin1 := privKey1.Bytes()
+	privKey2.SetBytes(privKeyBin1)
+	privKeyBin2 := privKey2.Bytes()
+	if len(privKeyBin1) != len(privKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(privKeyBin1); i++ {
+		if privKeyBin1[i] != privKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+}
+
+func TestEddsaMIMC(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	if err != nil {
+		t.Fatal(nil)
+	}
+	pubKey := privKey.Public()
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, err := privKey.Sign(msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035979")
+	msgBin = frMsg.Bytes()
+	res, err = pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+func TestEddsaSHA256(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := sha256.New()
+
+	// create eddsa obj and sign a message
+	// create eddsa obj and sign a message
+
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	signature, err := privKey.Sign([]byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, []byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	res, err = pubKey.Verify(signature, []byte("wrong_message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+// benchmarks
+
+func BenchmarkVerify(b *testing.B) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := hash.MIMC_BW6_756.New("seed")
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BW6_756.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		b.Fatal(err)
+	}
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, _ := privKey.Sign(msgBin[:], hFunc)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		pubKey.Verify(signature, msgBin[:], hFunc)
+	}
+}
diff --git a/ecc/bw6-756/twistededwards/eddsa/marshal.go b/ecc/bw6-756/twistededwards/eddsa/marshal.go
new file mode 100644
index 000000000..c68129087
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/eddsa/marshal.go
@@ -0,0 +1,133 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"io"
+)
+
+// Bytes returns the binary representation of the public key
+// follows https://tools.ietf.org/html/rfc8032#section-3.1
+// and returns a compressed representation of the point (x,y)
+//
+// x, y are the coordinates of the point
+// on the twisted Edwards as big endian integers.
+// compressed representation store x with a parity bit to recompute y
+func (pk *PublicKey) Bytes() []byte {
+	var res [sizePublicKey]byte
+	pkBin := pk.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pkBin[:])
+	return res[:]
+}
+
+// SetBytes sets p from binary representation in buf.
+// buf represents a public key as x||y where x, y are
+// interpreted as big endian binary numbers corresponding
+// to the coordinates of a point on the twisted Edwards.
+// It returns the number of bytes read from the buffer.
+func (pk *PublicKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePublicKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := pk.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !pk.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	return n, nil
+}
+
+// Bytes returns the binary representation of pk,
+// as byte array publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+func (privKey *PrivateKey) Bytes() []byte {
+	var res [sizePrivateKey]byte
+	pubkBin := privKey.PublicKey.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pubkBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:2*sizeFr], privKey.scalar[:])
+	subtle.ConstantTimeCopy(1, res[2*sizeFr:], privKey.randSrc[:])
+	return res[:]
+}
+
+// SetBytes sets pk from buf, where buf is interpreted
+// as  publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+// It returns the number byte read.
+func (privKey *PrivateKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePrivateKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := privKey.PublicKey.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !privKey.PublicKey.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, privKey.scalar[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	subtle.ConstantTimeCopy(1, privKey.randSrc[:], buf[2*sizeFr:])
+	n += sizeFr
+	return n, nil
+}
+
+// Bytes returns the binary representation of sig
+// as a byte array of size 3*sizeFr x||y||s where
+// * x, y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+func (sig *Signature) Bytes() []byte {
+	var res [sizeSignature]byte
+	sigRBin := sig.R.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], sigRBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:], sig.S[:])
+	return res[:]
+}
+
+// SetBytes sets sig from a buffer in binary.
+// buf is read interpreted as x||y||s where
+// * x,y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+// It returns the number of bytes read from buf.
+func (sig *Signature) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizeSignature {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := sig.R.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !sig.R.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, sig.S[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	return n, nil
+}
diff --git a/ecc/bw6-756/twistededwards/point.go b/ecc/bw6-756/twistededwards/point.go
new file mode 100644
index 000000000..d6457b1a1
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/point.go
@@ -0,0 +1,411 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"crypto/subtle"
+	"io"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// PointAffine point on a twisted Edwards curve
+type PointAffine struct {
+	X, Y fr.Element
+}
+
+// PointProj point in projective coordinates
+type PointProj struct {
+	X, Y, Z fr.Element
+}
+
+const (
+	//following https://tools.ietf.org/html/rfc8032#section-3.1,
+	// an fr element x is negative if its binary encoding is
+	// lexicographically larger than -x.
+	mCompressedNegative = 0x80
+	mCompressedPositive = 0x00
+	mUnmask             = 0x7f
+
+	// size in byte of a compressed point (point.Y --> fr.Element)
+	sizePointCompressed = fr.Limbs * 8
+)
+
+// Bytes returns the compressed point as a byte array
+// Follows https://tools.ietf.org/html/rfc8032#section-3.1,
+// as the twisted Edwards implementation is primarily used
+// for eddsa.
+func (p *PointAffine) Bytes() [sizePointCompressed]byte {
+
+	var res [sizePointCompressed]byte
+	var mask uint
+
+	y := p.Y.Bytes()
+
+	if p.X.LexicographicallyLargest() {
+		mask = mCompressedNegative
+	} else {
+		mask = mCompressedPositive
+	}
+	// p.Y must be in little endian
+	y[0] |= byte(mask) // msb of y
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		y[i], y[j] = y[j], y[i]
+	}
+	subtle.ConstantTimeCopy(1, res[:], y[:])
+	return res
+}
+
+// Marshal converts p to a byte slice
+func (p *PointAffine) Marshal() []byte {
+	b := p.Bytes()
+	return b[:]
+}
+
+func computeX(y *fr.Element) (x fr.Element) {
+	var one, num, den fr.Element
+	one.SetOne()
+	num.Square(y)
+	den.Mul(&num, &edwards.D)
+	num.Sub(&one, &num)
+	den.Sub(&edwards.A, &den)
+	x.Div(&num, &den)
+	x.Sqrt(&x)
+	return
+}
+
+// SetBytes sets p from buf
+// len(buf) >= sizePointCompressed
+// buf contains the Y coordinate masked with a parity bit to recompute the X coordinate
+// from the curve equation. See Bytes() and https://tools.ietf.org/html/rfc8032#section-3.1
+// Returns the number of read bytes and an error if the buffer is too short.
+func (p *PointAffine) SetBytes(buf []byte) (int, error) {
+
+	if len(buf) < sizePointCompressed {
+		return 0, io.ErrShortBuffer
+	}
+	bufCopy := make([]byte, sizePointCompressed)
+	subtle.ConstantTimeCopy(1, bufCopy, buf[:sizePointCompressed])
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		bufCopy[i], bufCopy[j] = bufCopy[j], bufCopy[i]
+	}
+	isLexicographicallyLargest := (mCompressedNegative&bufCopy[0])>>7 == 1
+	bufCopy[0] &= mUnmask
+	p.Y.SetBytes(bufCopy)
+	p.X = computeX(&p.Y)
+	if isLexicographicallyLargest {
+		if !p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	} else {
+		if p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	}
+
+	return sizePointCompressed, nil
+}
+
+// Unmarshal alias to SetBytes()
+func (p *PointAffine) Unmarshal(b []byte) error {
+	_, err := p.SetBytes(b)
+	return err
+}
+
+// Set sets p to p1 and return it
+func (p *PointProj) Set(p1 *PointProj) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.Set(&p1.Z)
+	return p
+}
+
+// Set sets p to p1 and return it
+func (p *PointAffine) Set(p1 *PointAffine) *PointAffine {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	return p
+}
+
+// Equal returns true if p=p1 false otherwise
+func (p *PointAffine) Equal(p1 *PointAffine) bool {
+	return p.X.Equal(&p1.X) && p.Y.Equal(&p1.Y)
+}
+
+// Equal returns true if p=p1 false otherwise
+// If one point is on the affine chart Z=0 it returns false
+func (p *PointProj) Equal(p1 *PointProj) bool {
+	if p.Z.IsZero() || p1.Z.IsZero() {
+		return false
+	}
+	var pAffine, p1Affine PointAffine
+	pAffine.FromProj(p)
+	p1Affine.FromProj(p1)
+	return pAffine.Equal(&p1Affine)
+}
+
+// NewPointAffine creates a new instance of PointAffine
+func NewPointAffine(x, y fr.Element) PointAffine {
+	return PointAffine{x, y}
+}
+
+// IsOnCurve checks if a point is on the twisted Edwards curve
+func (p *PointAffine) IsOnCurve() bool {
+
+	ecurve := GetEdwardsCurve()
+
+	var lhs, rhs, tmp fr.Element
+
+	tmp.Mul(&p.Y, &p.Y)
+	lhs.Mul(&p.X, &p.X)
+	mulByA(&lhs)
+	lhs.Add(&lhs, &tmp)
+
+	tmp.Mul(&p.X, &p.X).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &ecurve.D)
+	rhs.SetOne().Add(&rhs, &tmp)
+
+	return lhs.Equal(&rhs)
+}
+
+// Add adds two points (x,y), (u,v) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Add(p1, p2 *PointAffine) *PointAffine {
+
+	ecurve := GetEdwardsCurve()
+
+	var xu, yv, xv, yu, dxyuv, one, denx, deny fr.Element
+	pRes := new(PointAffine)
+	xv.Mul(&p1.X, &p2.Y)
+	yu.Mul(&p1.Y, &p2.X)
+	pRes.X.Add(&xv, &yu)
+
+	xu.Mul(&p1.X, &p2.X)
+	mulByA(&xu)
+	yv.Mul(&p1.Y, &p2.Y)
+	pRes.Y.Sub(&yv, &xu)
+
+	dxyuv.Mul(&xv, &yu).Mul(&dxyuv, &ecurve.D)
+	one.SetOne()
+	denx.Add(&one, &dxyuv)
+	deny.Sub(&one, &dxyuv)
+
+	p.X.Div(&pRes.X, &denx)
+	p.Y.Div(&pRes.Y, &deny)
+
+	return p
+}
+
+// Double doubles point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Double(p1 *PointAffine) *PointAffine {
+
+	p.Set(p1)
+	var xx, yy, xy, denum, two fr.Element
+
+	xx.Square(&p.X)
+	yy.Square(&p.Y)
+	xy.Mul(&p.X, &p.Y)
+	mulByA(&xx)
+	denum.Add(&xx, &yy)
+
+	p.X.Double(&xy).Div(&p.X, &denum)
+
+	two.SetOne().Double(&two)
+	denum.Neg(&denum).Add(&denum, &two)
+
+	p.Y.Sub(&yy, &xx).Div(&p.Y, &denum)
+
+	return p
+}
+
+// Neg negates point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointProj) Neg(p1 *PointProj) *PointProj {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// FromProj sets p in affine from p in projective
+func (p *PointAffine) FromProj(p1 *PointProj) *PointAffine {
+	p.X.Div(&p1.X, &p1.Z)
+	p.Y.Div(&p1.Y, &p1.Z)
+	return p
+}
+
+// FromAffine sets p in projective from p in affine
+func (p *PointProj) FromAffine(p1 *PointAffine) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.SetOne()
+	return p
+}
+
+// Add adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-add-2008-bbjlp
+func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var A, B, C, D, E, F, G, H, I fr.Element
+	A.Mul(&p1.Z, &p2.Z)
+	B.Square(&A)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &A).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	C.Neg(&C)
+	res.Y.Add(&D, &C).
+		Mul(&res.Y, &A).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// MixedAdd adds a point in projective to a point in affine coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-madd-2008-bbjlp
+func (p *PointProj) MixedAdd(p1 *PointProj, p2 *PointAffine) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var B, C, D, E, F, G, H, I fr.Element
+	B.Square(&p1.Z)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &p1.Z).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	res.Y.Sub(&D, &C).
+		Mul(&res.Y, &p1.Z).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// Double adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#doubling-dbl-2008-bbjlp
+func (p *PointProj) Double(p1 *PointProj) *PointProj {
+
+	var res PointProj
+
+	var B, C, D, E, F, H, J fr.Element
+
+	B.Add(&p1.X, &p1.Y).Square(&B)
+	C.Square(&p1.X)
+	D.Square(&p1.Y)
+	E.Set(&C)
+	mulByA(&E)
+	F.Add(&E, &D)
+	H.Square(&p1.Z)
+	J.Sub(&F, &H).Sub(&J, &H)
+	res.X.Sub(&B, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &J)
+	res.Y.Sub(&E, &D).Mul(&res.Y, &F)
+	res.Z.Mul(&F, &J)
+
+	p.Set(&res)
+	return p
+}
+
+// Neg sets p to -p1 and returns it
+func (p *PointAffine) Neg(p1 *PointAffine) *PointAffine {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// setInfinity sets p to O (0:1:1)
+func (p *PointProj) setInfinity() *PointProj {
+	p.X.SetZero()
+	p.Y.SetOne()
+	p.Z.SetOne()
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in projective coordinates with a scalar in big.Int
+func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
+
+	var _scalar big.Int
+	_scalar.Set(scalar)
+	p.Set(p1)
+	if _scalar.Sign() == -1 {
+		_scalar.Neg(&_scalar)
+		p.Neg(p)
+	}
+	var resProj PointProj
+	resProj.setInfinity()
+	const wordSize = bits.UintSize
+	sWords := _scalar.Bits()
+
+	for i := len(sWords) - 1; i >= 0; i-- {
+		ithWord := sWords[i]
+		for k := 0; k < wordSize; k++ {
+			resProj.Double(&resProj)
+			kthBit := (ithWord >> (wordSize - 1 - k)) & 1
+			if kthBit == 1 {
+				resProj.Add(&resProj, p)
+			}
+		}
+	}
+
+	p.Set(&resProj)
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in affine coordinates with a scalar in big.Int
+func (p *PointAffine) ScalarMul(p1 *PointAffine, scalar *big.Int) *PointAffine {
+
+	var p1Proj, resProj PointProj
+	p1Proj.FromAffine(p1)
+	resProj.ScalarMul(&p1Proj, scalar)
+	p.FromProj(&resProj)
+
+	return p
+}
diff --git a/ecc/bw6-756/twistededwards/twistededwards.go b/ecc/bw6-756/twistededwards/twistededwards.go
new file mode 100644
index 000000000..152ac7c20
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/twistededwards.go
@@ -0,0 +1,63 @@
+/*
+Copyright © 2020 ConsenSys
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package twistededwards
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element // in Montgomery form
+	Cofactor fr.Element // not in Montgomery form
+	Order    big.Int
+	Base     PointAffine
+}
+
+var edwards CurveParams
+
+// GetEdwardsCurve returns the twisted Edwards curve on BW6-756's Fr
+func GetEdwardsCurve() CurveParams {
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&edwards.A)
+	res.D.Set(&edwards.D)
+	res.Cofactor.Set(&edwards.Cofactor)
+	res.Order.Set(&edwards.Order)
+	res.Base.Set(&edwards.Base)
+
+	return res
+}
+
+func init() {
+
+	edwards.A.SetUint64(143580)
+	edwards.D.SetUint64(143576)
+	edwards.Cofactor.SetUint64(8).FromMont()
+	edwards.Order.SetString("75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493", 10)
+
+	edwards.Base.X.SetString("178620376715698421301710631119120785579284871526578026139185646772672252736182448135689014711987732666078420387915")
+	edwards.Base.Y.SetString("279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501")
+}
+
+// mulByA multiplies fr.Element by edwards.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &edwards.A)
+}
diff --git a/ecc/bw6-756/twistededwards/twistededwards_test.go b/ecc/bw6-756/twistededwards/twistededwards_test.go
new file mode 100644
index 000000000..b0398d2d5
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/twistededwards_test.go
@@ -0,0 +1,456 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	// affine
+	properties.Property("Equal affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1 PointAffine
+			p1.Set(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&params.Base)
+		},
+	))
+
+	properties.Property("Add affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+			p3.Set(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.Set(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			var s big.Int
+			s.SetUint64(10)
+
+			p2.ScalarMul(&p1, &s)
+			p1.ScalarMul(&p1, &s)
+
+			return p2.Equal(&p1)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	// proj
+	properties.Property("Equal projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1, baseProj PointProj
+			p1.FromAffine(&params.Base)
+			baseProj.FromAffine(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&baseProj)
+		},
+	))
+
+	properties.Property("Add projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+			p3.FromAffine(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.FromAffine(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestField(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS := GenBigInt()
+
+	properties.Property("MulByA(x) should match Mul(x, curve.A)", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var z1, z2 fr.Element
+			z1.SetBigInt(&s)
+			z2.Mul(&z1, &params.A)
+			mulByA(&z1)
+
+			return z1.Equal(&z2)
+		},
+		genS,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS1 := GenBigInt()
+	genS2 := GenBigInt()
+
+	// affine
+	properties.Property("(affine) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+			p2.Neg(&p1)
+
+			p1.Add(&p1, &p2)
+
+			var one fr.Element
+			one.SetOne()
+
+			return p1.IsOnCurve() && p1.X.IsZero() && p1.Y.Equal(&one)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			p1.ScalarMul(&params.Base, &s)
+			p2.ScalarMul(&params.Base, &s)
+
+			p1.Add(&p1, &p2)
+			p2.Double(&p2)
+
+			return p1.IsOnCurve() && p1.Equal(&p2) && !p1.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) [a]P+[b]P = [a+b]P", prop.ForAll(
+		func(s1, s2 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetZero()
+			p1.ScalarMul(&params.Base, &s1)
+			p2.ScalarMul(&params.Base, &s2)
+			p3.Set(&params.Base)
+
+			p2.Add(&p1, &p2)
+
+			s1.Add(&s1, &s2)
+			p3.ScalarMul(&params.Base, &s1)
+
+			return p2.IsOnCurve() && p3.Equal(&p2) && !p3.Equal(&inf)
+		},
+		genS1,
+		genS2,
+	))
+
+	properties.Property("(affine) [a]P+[-a]P = O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetOne()
+			p1.ScalarMul(&params.Base, &s1)
+			s1.Neg(&s1)
+			p2.ScalarMul(&params.Base, &s1)
+
+			p2.Add(&p1, &p2)
+
+			return p2.IsOnCurve() && p2.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("[5]P=[2][2]P+P", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+
+			five := big.NewInt(5)
+			p2.Double(&p1).Double(&p2).Add(&p2, &p1)
+			p1.ScalarMul(&p1, five)
+
+			return p2.IsOnCurve() && p2.Equal(&p1)
+		},
+		genS1,
+	))
+
+	// proj
+	properties.Property("(projective) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p1.ScalarMul(&baseProj, &s1)
+			p2.Neg(&p1)
+
+			p.Add(&p1, &p2)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(projective) P+P=2*P", prop.ForAll(
+
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p.ScalarMul(&baseProj, &s)
+
+			p1.Add(&p, &p)
+			p2.Double(&p)
+
+			return p1.Equal(&p2)
+		},
+		genS1,
+	))
+
+	// mixed
+	properties.Property("(mixed) P+(-P)=O", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+			pAffine.Neg(&pAffine)
+
+			p.MixedAdd(&pProj, &pAffine)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(mixed) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p, p2 PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+
+			p.MixedAdd(&pProj, &pAffine)
+			p2.Double(&pProj)
+
+			return p.Equal(&p2)
+		},
+		genS1,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestMarshal(t *testing.T) {
+
+	var point, unmarshalPoint PointAffine
+	point.Set(&edwards.Base)
+	for i := 0; i < 20; i++ {
+		b := point.Marshal()
+		unmarshalPoint.Unmarshal(b)
+		if !point.Equal(&unmarshalPoint) {
+			t.Fatal("error unmarshal(marshal(point))")
+		}
+		point.Add(&point, &edwards.Base)
+	}
+}
+
+// GenBigInt generates a big.Int
+// TODO @thomas we use fr size as max bound here
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkScalarMul(b *testing.B) {
+	params := GetEdwardsCurve()
+	var a PointProj
+	var s big.Int
+	a.FromAffine(&params.Base)
+	s.SetString("52435875175126190479447705081859658376581184513", 10)
+	s.Add(&s, &params.Order)
+
+	var doubleAndAdd PointProj
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.ScalarMul(&a, &s)
+		}
+	})
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index 3615cf261..2b6d2378e 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, BLS12-378 and BW6-756 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315, bw6-633, bls12-378 and bw6-756 elliptic curves implementation (+pairing).
 //
 // Also
 //
diff --git a/hash/hashes.go b/hash/hashes.go
index 01b9b86ae..494b4d1d9 100644
--- a/hash/hashes.go
+++ b/hash/hashes.go
@@ -26,6 +26,7 @@ import (
 	bls315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/mimc"
 	bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
 	bw633 "github.com/consensys/gnark-crypto/ecc/bw6-633/fr/mimc"
+	bw756 "github.com/consensys/gnark-crypto/ecc/bw6-756/fr/mimc"
 	bw761 "github.com/consensys/gnark-crypto/ecc/bw6-761/fr/mimc"
 )
 
@@ -39,6 +40,7 @@ const (
 	MIMC_BW6_761
 	MIMC_BLS24_315
 	MIMC_BW6_633
+	MIMC_BW6_756
 )
 
 // size of digests in bytes
@@ -50,6 +52,7 @@ var digestSize = []uint8{
 	MIMC_BW6_761:   96,
 	MIMC_BLS24_315: 48,
 	MIMC_BW6_633:   80,
+	MIMC_BW6_756:   96,
 }
 
 // New creates the corresponding mimc hash function.
@@ -69,6 +72,8 @@ func (m Hash) New(seed string) hash.Hash {
 		return bls315.NewMiMC(seed)
 	case MIMC_BW6_633:
 		return bw633.NewMiMC(seed)
+	case MIMC_BW6_756:
+		return bw756.NewMiMC(seed)
 	default:
 		panic("Unknown mimc ID")
 	}
@@ -91,6 +96,8 @@ func (m Hash) String() string {
 		return "MIMC_BLS315"
 	case MIMC_BW6_633:
 		return "MIMC_BW633"
+	case MIMC_BW6_756:
+		return "MIMC_BW756"
 	default:
 		panic("Unknown mimc ID")
 	}
diff --git a/internal/generator/crypto/signature/eddsa/generate.go b/internal/generator/crypto/signature/eddsa/generate.go
index dd4f82519..1422a4137 100644
--- a/internal/generator/crypto/signature/eddsa/generate.go
+++ b/internal/generator/crypto/signature/eddsa/generate.go
@@ -8,10 +8,6 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_756) {
-		return nil
-	}
-
 	// eddsa
 	conf.Package = "eddsa"
 	entries := []bavard.Entry{
diff --git a/internal/generator/edwards/generate.go b/internal/generator/edwards/generate.go
index b2ea5b5e1..45f6bf1bd 100644
--- a/internal/generator/edwards/generate.go
+++ b/internal/generator/edwards/generate.go
@@ -8,10 +8,6 @@ import (
 )
 
 func Generate(conf config.Curve, baseDir string, bgen *bavard.BatchGenerator) error {
-	if conf.Equal(config.BW6_756) {
-		return nil
-	}
-
 	conf.Package = "twistededwards"
 
 	entries := []bavard.Entry{
diff --git a/signature/signature.go b/signature/signature.go
index f736ea653..d71cda2e7 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -75,7 +75,7 @@ type Signer interface {
 
 type SignatureScheme uint
 
-const maxSignatures = 7
+const maxSignatures = 8
 
 const (
 	EDDSA_BN254 SignatureScheme = iota
@@ -85,6 +85,7 @@ const (
 	EDDSA_BW6_761
 	EDDSA_BLS24_315
 	EDDSA_BW6_633
+	EDDSA_BW6_756
 )
 
 var signatures = make([]func(io.Reader) (Signer, error), maxSignatures)

From 900aadd1e47844576fb9c3a24c192e9b9f711bc7 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 31 Dec 2021 19:23:13 +0100
Subject: [PATCH 13/29] perf(bw6-756/tEd): smallest A coeff

---
 ecc/bw6-756/twistededwards/twistededwards.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ecc/bw6-756/twistededwards/twistededwards.go b/ecc/bw6-756/twistededwards/twistededwards.go
index 152ac7c20..c24f0261a 100644
--- a/ecc/bw6-756/twistededwards/twistededwards.go
+++ b/ecc/bw6-756/twistededwards/twistededwards.go
@@ -48,12 +48,12 @@ func GetEdwardsCurve() CurveParams {
 
 func init() {
 
-	edwards.A.SetUint64(143580)
-	edwards.D.SetUint64(143576)
+	edwards.A.SetUint64(35895)
+	edwards.D.SetUint64(35894)
 	edwards.Cofactor.SetUint64(8).FromMont()
 	edwards.Order.SetString("75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493", 10)
 
-	edwards.Base.X.SetString("178620376715698421301710631119120785579284871526578026139185646772672252736182448135689014711987732666078420387915")
+	edwards.Base.X.SetString("357240753431396842603421262238241571158569743053156052278371293545344505472364896271378029423975465332156840775830")
 	edwards.Base.Y.SetString("279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501")
 }
 

From 8009401addd79f346fbe23af0143c2e29ce05e11 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Mon, 20 Dec 2021 12:54:51 +0100
Subject: [PATCH 14/29] feat: add BLS12-378, a GT-strong SNARK-friendly inner
 curve

---
 ecc/bls12-377/bls12-377.go                    |    2 +-
 ecc/bls12-378/bls12-378.go                    |  126 +
 ecc/bls12-378/doc.go                          |   18 +
 ecc/bls12-378/fp/arith.go                     |   60 +
 ecc/bls12-378/fp/asm.go                       |   24 +
 ecc/bls12-378/fp/asm_noadx.go                 |   25 +
 ecc/bls12-378/fp/doc.go                       |   43 +
 ecc/bls12-378/fp/element.go                   | 1720 +++++++++++
 ecc/bls12-378/fp/element_exp.go               | 1040 +++++++
 ecc/bls12-378/fp/element_fuzz.go              |  152 +
 ecc/bls12-378/fp/element_mul_adx_amd64.s      |  836 +++++
 ecc/bls12-378/fp/element_mul_amd64.s          |  858 ++++++
 ecc/bls12-378/fp/element_ops_amd64.go         |   50 +
 ecc/bls12-378/fp/element_ops_amd64.s          |  452 +++
 ecc/bls12-378/fp/element_ops_noasm.go         |   78 +
 ecc/bls12-378/fp/element_test.go              | 2681 +++++++++++++++++
 ecc/bls12-378/fr/arith.go                     |   60 +
 ecc/bls12-378/fr/asm.go                       |   24 +
 ecc/bls12-378/fr/asm_noadx.go                 |   25 +
 ecc/bls12-378/fr/doc.go                       |   43 +
 ecc/bls12-378/fr/element.go                   | 1466 +++++++++
 ecc/bls12-378/fr/element_exp.go               |  642 ++++
 ecc/bls12-378/fr/element_fuzz.go              |  136 +
 ecc/bls12-378/fr/element_mul_adx_amd64.s      |  466 +++
 ecc/bls12-378/fr/element_mul_amd64.s          |  488 +++
 ecc/bls12-378/fr/element_ops_amd64.go         |   50 +
 ecc/bls12-378/fr/element_ops_amd64.s          |  340 +++
 ecc/bls12-378/fr/element_ops_noasm.go         |   78 +
 ecc/bls12-378/fr/element_test.go              | 2649 ++++++++++++++++
 ecc/bls12-378/fr/fft/doc.go                   |   18 +
 ecc/bls12-378/fr/fft/domain.go                |  293 ++
 ecc/bls12-378/fr/fft/domain_test.go           |   47 +
 ecc/bls12-378/fr/fft/fft.go                   |  318 ++
 ecc/bls12-378/fr/fft/fft_test.go              |  413 +++
 ecc/bls12-378/fr/fft/fuzz.go                  |   73 +
 ecc/bls12-378/fr/fft/fuzz_test.go             |   56 +
 ecc/bls12-378/fr/kzg/doc.go                   |   18 +
 ecc/bls12-378/fr/kzg/fuzz.go                  |   84 +
 ecc/bls12-378/fr/kzg/fuzz_test.go             |   56 +
 ecc/bls12-378/fr/kzg/kzg.go                   |  518 ++++
 ecc/bls12-378/fr/kzg/kzg_test.go              |  453 +++
 ecc/bls12-378/fr/kzg/marshal.go               |  138 +
 ecc/bls12-378/fr/mimc/doc.go                  |   18 +
 ecc/bls12-378/fr/mimc/fuzz.go                 |   34 +
 ecc/bls12-378/fr/mimc/mimc.go                 |  174 ++
 ecc/bls12-378/fr/permutation/doc.go           |   18 +
 ecc/bls12-378/fr/permutation/permutation.go   |  361 +++
 .../fr/permutation/permutation_test.go        |   94 +
 ecc/bls12-378/fr/plookup/doc.go               |   18 +
 ecc/bls12-378/fr/plookup/plookup_test.go      |  139 +
 ecc/bls12-378/fr/plookup/table.go             |  252 ++
 ecc/bls12-378/fr/plookup/vector.go            |  687 +++++
 ecc/bls12-378/fr/polynomial/doc.go            |   18 +
 ecc/bls12-378/fr/polynomial/polynomial.go     |  123 +
 .../fr/polynomial/polynomial_test.go          |  208 ++
 ecc/bls12-378/fuzz.go                         |   76 +
 ecc/bls12-378/fuzz_test.go                    |   56 +
 ecc/bls12-378/g1.go                           |  964 ++++++
 ecc/bls12-378/g1_test.go                      |  666 ++++
 ecc/bls12-378/g2.go                           |  978 ++++++
 ecc/bls12-378/g2_test.go                      |  685 +++++
 ecc/bls12-378/hash_to_curve.go                |  276 ++
 ecc/bls12-378/internal/fptower/asm.go         |   28 +
 ecc/bls12-378/internal/fptower/asm_noadx.go   |   25 +
 ecc/bls12-378/internal/fptower/e12.go         |  561 ++++
 ecc/bls12-378/internal/fptower/e12_pairing.go |  128 +
 ecc/bls12-378/internal/fptower/e12_test.go    |  492 +++
 ecc/bls12-378/internal/fptower/e2.go          |  262 ++
 ecc/bls12-378/internal/fptower/e2_amd64.go    |   45 +
 ecc/bls12-378/internal/fptower/e2_amd64.s     |  320 ++
 ecc/bls12-378/internal/fptower/e2_bls378.go   |  104 +
 ecc/bls12-378/internal/fptower/e2_fallback.go |   40 +
 ecc/bls12-378/internal/fptower/e2_test.go     |  506 ++++
 ecc/bls12-378/internal/fptower/e6.go          |  264 ++
 ecc/bls12-378/internal/fptower/e6_test.go     |  317 ++
 ecc/bls12-378/internal/fptower/frobenius.go   |  305 ++
 .../internal/fptower/generators_test.go       |   51 +
 ecc/bls12-378/marshal.go                      | 1160 +++++++
 ecc/bls12-378/marshal_test.go                 |  467 +++
 ecc/bls12-378/multiexp.go                     | 2303 ++++++++++++++
 ecc/bls12-378/multiexp_test.go                | 1349 +++++++++
 ecc/bls12-378/pairing.go                      |  241 ++
 ecc/bls12-378/pairing_test.go                 |  305 ++
 ecc/bls12-378/twistededwards/doc.go           |   18 +
 ecc/bls12-378/twistededwards/eddsa/doc.go     |   22 +
 ecc/bls12-378/twistededwards/eddsa/eddsa.go   |  265 ++
 .../twistededwards/eddsa/eddsa_test.go        |  208 ++
 ecc/bls12-378/twistededwards/eddsa/marshal.go |  133 +
 ecc/bls12-378/twistededwards/point.go         |  411 +++
 .../twistededwards/twistededwards_test.go     |  456 +++
 ecc/ecc.go                                    |    9 +-
 ...da71edd87ff573bf9ed04a00009948a20000000000 |  Bin 0 -> 2295 bytes
 ...b068524ebfe74bfbb5411600004ca4510000000000 |  Bin 0 -> 3532 bytes
 ...65d630ef0ff69c7b761ffd5cefe7b4128000265228 |  Bin 0 -> 1907 bytes
 ...08c9a60370d83429275ff3a5fddaa08b0000265228 |  Bin 0 -> 3158 bytes
 internal/generator/config/bls12-378.go        |   29 +
 internal/generator/ecc/template/point.go.tmpl |    7 +-
 97 files changed, 33786 insertions(+), 7 deletions(-)
 create mode 100644 ecc/bls12-378/bls12-378.go
 create mode 100644 ecc/bls12-378/doc.go
 create mode 100644 ecc/bls12-378/fp/arith.go
 create mode 100644 ecc/bls12-378/fp/asm.go
 create mode 100644 ecc/bls12-378/fp/asm_noadx.go
 create mode 100644 ecc/bls12-378/fp/doc.go
 create mode 100644 ecc/bls12-378/fp/element.go
 create mode 100644 ecc/bls12-378/fp/element_exp.go
 create mode 100644 ecc/bls12-378/fp/element_fuzz.go
 create mode 100644 ecc/bls12-378/fp/element_mul_adx_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_mul_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_ops_amd64.go
 create mode 100644 ecc/bls12-378/fp/element_ops_amd64.s
 create mode 100644 ecc/bls12-378/fp/element_ops_noasm.go
 create mode 100644 ecc/bls12-378/fp/element_test.go
 create mode 100644 ecc/bls12-378/fr/arith.go
 create mode 100644 ecc/bls12-378/fr/asm.go
 create mode 100644 ecc/bls12-378/fr/asm_noadx.go
 create mode 100644 ecc/bls12-378/fr/doc.go
 create mode 100644 ecc/bls12-378/fr/element.go
 create mode 100644 ecc/bls12-378/fr/element_exp.go
 create mode 100644 ecc/bls12-378/fr/element_fuzz.go
 create mode 100644 ecc/bls12-378/fr/element_mul_adx_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_mul_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_ops_amd64.go
 create mode 100644 ecc/bls12-378/fr/element_ops_amd64.s
 create mode 100644 ecc/bls12-378/fr/element_ops_noasm.go
 create mode 100644 ecc/bls12-378/fr/element_test.go
 create mode 100644 ecc/bls12-378/fr/fft/doc.go
 create mode 100644 ecc/bls12-378/fr/fft/domain.go
 create mode 100644 ecc/bls12-378/fr/fft/domain_test.go
 create mode 100644 ecc/bls12-378/fr/fft/fft.go
 create mode 100644 ecc/bls12-378/fr/fft/fft_test.go
 create mode 100644 ecc/bls12-378/fr/fft/fuzz.go
 create mode 100644 ecc/bls12-378/fr/fft/fuzz_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/doc.go
 create mode 100644 ecc/bls12-378/fr/kzg/fuzz.go
 create mode 100644 ecc/bls12-378/fr/kzg/fuzz_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/kzg.go
 create mode 100644 ecc/bls12-378/fr/kzg/kzg_test.go
 create mode 100644 ecc/bls12-378/fr/kzg/marshal.go
 create mode 100644 ecc/bls12-378/fr/mimc/doc.go
 create mode 100644 ecc/bls12-378/fr/mimc/fuzz.go
 create mode 100644 ecc/bls12-378/fr/mimc/mimc.go
 create mode 100644 ecc/bls12-378/fr/permutation/doc.go
 create mode 100644 ecc/bls12-378/fr/permutation/permutation.go
 create mode 100644 ecc/bls12-378/fr/permutation/permutation_test.go
 create mode 100644 ecc/bls12-378/fr/plookup/doc.go
 create mode 100644 ecc/bls12-378/fr/plookup/plookup_test.go
 create mode 100644 ecc/bls12-378/fr/plookup/table.go
 create mode 100644 ecc/bls12-378/fr/plookup/vector.go
 create mode 100644 ecc/bls12-378/fr/polynomial/doc.go
 create mode 100644 ecc/bls12-378/fr/polynomial/polynomial.go
 create mode 100644 ecc/bls12-378/fr/polynomial/polynomial_test.go
 create mode 100644 ecc/bls12-378/fuzz.go
 create mode 100644 ecc/bls12-378/fuzz_test.go
 create mode 100644 ecc/bls12-378/g1.go
 create mode 100644 ecc/bls12-378/g1_test.go
 create mode 100644 ecc/bls12-378/g2.go
 create mode 100644 ecc/bls12-378/g2_test.go
 create mode 100644 ecc/bls12-378/hash_to_curve.go
 create mode 100644 ecc/bls12-378/internal/fptower/asm.go
 create mode 100644 ecc/bls12-378/internal/fptower/asm_noadx.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12_pairing.go
 create mode 100644 ecc/bls12-378/internal/fptower/e12_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_amd64.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_amd64.s
 create mode 100644 ecc/bls12-378/internal/fptower/e2_bls378.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_fallback.go
 create mode 100644 ecc/bls12-378/internal/fptower/e2_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/e6.go
 create mode 100644 ecc/bls12-378/internal/fptower/e6_test.go
 create mode 100644 ecc/bls12-378/internal/fptower/frobenius.go
 create mode 100644 ecc/bls12-378/internal/fptower/generators_test.go
 create mode 100644 ecc/bls12-378/marshal.go
 create mode 100644 ecc/bls12-378/marshal_test.go
 create mode 100644 ecc/bls12-378/multiexp.go
 create mode 100644 ecc/bls12-378/multiexp_test.go
 create mode 100644 ecc/bls12-378/pairing.go
 create mode 100644 ecc/bls12-378/pairing_test.go
 create mode 100644 ecc/bls12-378/twistededwards/doc.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/doc.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/eddsa.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
 create mode 100644 ecc/bls12-378/twistededwards/eddsa/marshal.go
 create mode 100644 ecc/bls12-378/twistededwards/point.go
 create mode 100644 ecc/bls12-378/twistededwards/twistededwards_test.go
 create mode 100644 internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
 create mode 100644 internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
 create mode 100644 internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
 create mode 100644 internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
 create mode 100644 internal/generator/config/bls12-378.go

diff --git a/ecc/bls12-377/bls12-377.go b/ecc/bls12-377/bls12-377.go
index e6dce675a..c41f651b7 100644
--- a/ecc/bls12-377/bls12-377.go
+++ b/ecc/bls12-377/bls12-377.go
@@ -111,7 +111,7 @@ func init() {
 	endo.u.A0.SetString("80949648264912719408558363140637477264845294720710499478137287262712535938301461879813459410946")
 	endo.v.A0.SetString("216465761340224619389371505802605247630151569547285782856803747159100223055385581585702401816380679166954762214499")
 
-	// binary decomposition of 15132376222941642752 little endian
+	// binary decomposition of 9586122913090633729 little endian
 	loopCounter = [64]int8{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1}
 
 	xGen.SetString("9586122913090633729", 10)
diff --git a/ecc/bls12-378/bls12-378.go b/ecc/bls12-378/bls12-378.go
new file mode 100644
index 000000000..842c373c6
--- /dev/null
+++ b/ecc/bls12-378/bls12-378.go
@@ -0,0 +1,126 @@
+package bls12378
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// E: y**2=x**3+1
+// Etwist: y**2 = x**3+u
+// Tower: Fp->Fp2, u**2=-5 -> Fp12, v**6=u
+// Generator (BLS12 family): x=11045256207009841153
+// optimal Ate loop: trace(frob)-1=x
+// trace of pi: x+1
+// Fp: p=605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+// Fr: r=14883435066912132899950318861128167269793560281114003360875131245101026639873
+
+// ID bls378 ID
+const ID = ecc.BLS12_378
+
+// bCurveCoeff b coeff of the curve
+var bCurveCoeff fp.Element
+
+// bTwistCurveCoeff b coeff of the twist (defined over Fp2) curve
+var bTwistCurveCoeff fptower.E2
+
+// generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+var g1Gen G1Jac
+var g2Gen G2Jac
+
+var g1GenAff G1Affine
+var g2GenAff G2Affine
+
+// point at infinity
+var g1Infinity G1Jac
+var g2Infinity G2Jac
+
+// optimal Ate loop counter (=trace-1 = x in BLS family)
+var loopCounter [64]int8
+
+// Parameters useful for the GLV scalar multiplication. The third roots define the
+//  endomorphisms phi1 and phi2 for <G1Affine> and <G2Affine>. lambda is such that <r, phi-lambda> lies above
+// <r> in the ring Z[phi]. More concretely it's the associated eigenvalue
+// of phi1 (resp phi2) restricted to <G1Affine> (resp <G2Affine>)
+// cf https://www.cosic.esat.kuleuven.be/nessie/reports/phase2/GLV.pdf
+var thirdRootOneG1 fp.Element
+var thirdRootOneG2 fp.Element
+var lambdaGLV big.Int
+
+// glvBasis stores R-linearly independant vectors (a,b), (c,d)
+// in ker((u,v)->u+vlambda[r]), and their determinant
+var glvBasis ecc.Lattice
+
+// psi o pi o psi**-1, where psi:E->E' is the degree 6 iso defined over Fp12
+var endo struct {
+	u fptower.E2
+	v fptower.E2
+}
+
+// generator of the curve
+var xGen big.Int
+
+// expose the tower -- github.com/consensys/gnark uses it in a gnark circuit
+
+// E2 is a degree two finite field extension of fp.Element
+type E2 = fptower.E2
+
+// E6 is a degree three finite field extension of fp2
+type E6 = fptower.E6
+
+// E12 is a degree two finite field extension of fp6
+type E12 = fptower.E12
+
+func init() {
+
+	bCurveCoeff.SetUint64(1)
+	bTwistCurveCoeff.A1.SetUint64(1) // M-twist
+
+	// E(3,y) * cofactor
+	g1Gen.X.SetString("302027100877540500544138164010696035562809807233645104772290911818386302983750063098216015456036850656714568735197")
+	g1Gen.Y.SetString("232851047397483214541821965369374725182070455016459237170823497053622811786333462699984177726412751508198874482530")
+	g1Gen.Z.SetString("1")
+
+	// E'(1,y) * cofactor'
+	g2Gen.X.SetString("470810816643554779222760025249941413452299198622737082648784137654933833261310635469274149014014206108405592809732",
+		"317092959336227428400228502739777439718827088477410533227996105067347670094088101088421556743730925535231685964487")
+	g2Gen.Y.SetString("248853758964950314624408411876149087897475217517523838449839260719963153199419627931373025216041741725848318074460",
+		"389162134924826972299508957175841717907876177152103852864177212390074067430801162403069988146334006672491106545644")
+	g2Gen.Z.SetString("1",
+		"0")
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	g1Infinity.X.SetOne()
+	g1Infinity.Y.SetOne()
+	g2Infinity.X.SetOne()
+	g2Infinity.Y.SetOne()
+
+	thirdRootOneG1.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337729")
+	thirdRootOneG2.Square(&thirdRootOneG1)
+	lambdaGLV.SetString("121997684678489422961514670190292369408", 10) //(x**2-1)
+	_r := fr.Modulus()
+	ecc.PrecomputeLattice(_r, &lambdaGLV, &glvBasis)
+
+	endo.u.A0.SetString("164391353554439166353793911729193406645071739502673898176639736370075683438438023898983435337730")
+	endo.v.A0.SetString("595603361117066405543541008735167904222384847192046901135681663787023479658010166685728902742824780272831835669219")
+
+	// binary decomposition of 11045256207009841153 little endian
+	loopCounter = [64]int8{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1}
+
+	xGen.SetString("11045256207009841153", 10)
+
+}
+
+// Generators return the generators of the r-torsion group, resp. in ker(pi-id), ker(Tr)
+func Generators() (g1Jac G1Jac, g2Jac G2Jac, g1Aff G1Affine, g2Aff G2Affine) {
+	g1Aff = g1GenAff
+	g2Aff = g2GenAff
+	g1Jac = g1Gen
+	g2Jac = g2Gen
+	return
+}
diff --git a/ecc/bls12-378/doc.go b/ecc/bls12-378/doc.go
new file mode 100644
index 000000000..cd73fedbd
--- /dev/null
+++ b/ecc/bls12-378/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package bls12378 efficient elliptic curve and pairing implementation for bls12-378.
+package bls12378
diff --git a/ecc/bls12-378/fp/arith.go b/ecc/bls12-378/fp/arith.go
new file mode 100644
index 000000000..66fa66748
--- /dev/null
+++ b/ecc/bls12-378/fp/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bls12-378/fp/asm.go b/ecc/bls12-378/fp/asm.go
new file mode 100644
index 000000000..7344271eb
--- /dev/null
+++ b/ecc/bls12-378/fp/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bls12-378/fp/asm_noadx.go b/ecc/bls12-378/fp/asm_noadx.go
new file mode 100644
index 000000000..ae778bd3a
--- /dev/null
+++ b/ecc/bls12-378/fp/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/fp/doc.go b/ecc/bls12-378/fp/doc.go
new file mode 100644
index 000000000..dd844b5dc
--- /dev/null
+++ b/ecc/bls12-378/fp/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fp contains field arithmetic operations for modulus = 0x3eeb04...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [6]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f76a822c00009948a20000000001 // base 16
+// 	605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417 // base 10
+package fp
diff --git a/ecc/bls12-378/fp/element.go b/ecc/bls12-378/fp/element.go
new file mode 100644
index 000000000..69e071293
--- /dev/null
+++ b/ecc/bls12-378/fp/element.go
@@ -0,0 +1,1720 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 6 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+type Element [6]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 6
+
+// Bits number bits needed to represent Element
+const Bits = 378
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 11045256207009841153
+const qElementWord1 uint64 = 14886639130118979584
+const qElementWord2 uint64 = 10956628289047010687
+const qElementWord3 uint64 = 9513184293603517222
+const qElementWord4 uint64 = 6038022134869067682
+const qElementWord5 uint64 = 283357621510263184
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+	qElementWord4,
+	qElementWord5,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 11045256207009841151
+
+// rSquare
+var rSquare = Element{
+	13541478318970833666,
+	5510290684934426267,
+	8467587974331926354,
+	13931463632695577534,
+	3531303697457869800,
+	51529254522778566,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	z[4] = x[4]
+	z[5] = x[5]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fp.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	z[4] = 0
+	z[5] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 1481365419032838079
+	z[1] = 10045892448872562649
+	z[2] = 7242180086616818316
+	z[3] = 8832319421896135475
+	z[4] = 13356930855120736188
+	z[5] = 28498675542444634
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 6 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[5] == x[5]) && (z[4] == x[4]) && (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[5] > _x[5] {
+		return 1
+	} else if _z[5] < _x[5] {
+		return -1
+	}
+	if _z[4] > _x[4] {
+		return 1
+	} else if _z[4] < _x[4] {
+		return -1
+	}
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 5522628103504920577, 0)
+	_, b = bits.Sub64(_z[1], 16666691601914265600, b)
+	_, b = bits.Sub64(_z[2], 5478314144523505343, b)
+	_, b = bits.Sub64(_z[3], 4756592146801758611, b)
+	_, b = bits.Sub64(_z[4], 3019011067434533841, b)
+	_, b = bits.Sub64(_z[5], 141678810755131592, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [48]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[4] = binary.BigEndian.Uint64(bytes[32:40])
+	z[5] = binary.BigEndian.Uint64(bytes[40:48])
+	z[5] %= 283357621510263184
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], carry = bits.Add64(z[1], 14886639130118979584, carry)
+		z[2], carry = bits.Add64(z[2], 10956628289047010687, carry)
+		z[3], carry = bits.Add64(z[3], 9513184293603517222, carry)
+		z[4], carry = bits.Add64(z[4], 6038022134869067682, carry)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] = z[3]>>1 | z[4]<<63
+	z[4] = z[4]>>1 | z[5]<<63
+	z[5] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [6]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd1(v, y[4], c[1])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd1(v, y[5], c[1])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 4
+		v := x[4]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], t[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], t[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		t[5], t[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+	{
+		// round 5
+		v := x[5]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 11045256207009841151
+		c[2] = madd0(m, 11045256207009841153, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 14886639130118979584, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 10956628289047010687, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		c[2], z[2] = madd2(m, 9513184293603517222, c[2], c[0])
+		c[1], c[0] = madd2(v, y[4], c[1], t[4])
+		c[2], z[3] = madd2(m, 6038022134869067682, c[2], c[0])
+		c[1], c[0] = madd2(v, y[5], c[1], t[5])
+		z[5], z[4] = madd3(m, 283357621510263184, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [6]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 14886639130118979584, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 10956628289047010687, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		c2, t[2] = madd2(m, 9513184293603517222, c2, c0)
+		c1, c0 = madd1(y, x[4], c1)
+		c2, t[3] = madd2(m, 6038022134869067682, c2, c0)
+		c1, c0 = madd1(y, x[5], c1)
+		t[5], t[4] = madd3(m, 283357621510263184, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 4
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, t[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, t[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, t[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, t[3] = madd2(m, 6038022134869067682, c2, t[4])
+		t[5], t[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+	{
+		// round 5
+		m := t[0] * 11045256207009841151
+		c2 := madd0(m, 11045256207009841153, t[0])
+		c2, z[0] = madd2(m, 14886639130118979584, c2, t[1])
+		c2, z[1] = madd2(m, 10956628289047010687, c2, t[2])
+		c2, z[2] = madd2(m, 9513184293603517222, c2, t[3])
+		c2, z[3] = madd2(m, 6038022134869067682, c2, t[4])
+		z[5], z[4] = madd2(m, 283357621510263184, t[5], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 11045256207009841151
+		C := madd0(m, 11045256207009841153, z[0])
+		C, z[0] = madd2(m, 14886639130118979584, z[1], C)
+		C, z[1] = madd2(m, 10956628289047010687, z[2], C)
+		C, z[2] = madd2(m, 9513184293603517222, z[3], C)
+		C, z[3] = madd2(m, 6038022134869067682, z[4], C)
+		C, z[4] = madd2(m, 283357621510263184, z[5], C)
+		z[5] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], carry = bits.Add64(x[3], y[3], carry)
+	z[4], carry = bits.Add64(x[4], y[4], carry)
+	z[5], _ = bits.Add64(x[5], y[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], carry = bits.Add64(x[3], x[3], carry)
+	z[4], carry = bits.Add64(x[4], x[4], carry)
+	z[5], _ = bits.Add64(x[5], x[5], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	z[4], b = bits.Sub64(x[4], y[4], b)
+	z[5], b = bits.Sub64(x[5], y[5], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 11045256207009841153, 0)
+		z[1], c = bits.Add64(z[1], 14886639130118979584, c)
+		z[2], c = bits.Add64(z[2], 10956628289047010687, c)
+		z[3], c = bits.Add64(z[3], 9513184293603517222, c)
+		z[4], c = bits.Add64(z[4], 6038022134869067682, c)
+		z[5], _ = bits.Add64(z[5], 283357621510263184, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(11045256207009841153, x[0], 0)
+	z[1], borrow = bits.Sub64(14886639130118979584, x[1], borrow)
+	z[2], borrow = bits.Sub64(10956628289047010687, x[2], borrow)
+	z[3], borrow = bits.Sub64(9513184293603517222, x[3], borrow)
+	z[4], borrow = bits.Sub64(6038022134869067682, x[4], borrow)
+	z[5], _ = bits.Sub64(283357621510263184, x[5], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[5] != 0 {
+		return 320 + bits.Len64(z[5])
+	}
+	if z[4] != 0 {
+		return 256 + bits.Len64(z[4])
+	}
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[40:48], z[0])
+	binary.BigEndian.PutUint64(b[32:40], z[1])
+	binary.BigEndian.PutUint64(b[24:32], z[2])
+	binary.BigEndian.PutUint64(b[16:24], z[3])
+	binary.BigEndian.PutUint64(b[8:16], z[4])
+	binary.BigEndian.PutUint64(b[0:8], z[5])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[40:48], _z[0])
+	binary.BigEndian.PutUint64(res[32:40], _z[1])
+	binary.BigEndian.PutUint64(res[24:32], _z[2])
+	binary.BigEndian.PutUint64(res[16:24], _z[3])
+	binary.BigEndian.PutUint64(res[8:16], _z[4])
+	binary.BigEndian.PutUint64(res[0:8], _z[5])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[5] == 28498675542444634) && (l[4] == 13356930855120736188) && (l[3] == 8832319421896135475) && (l[2] == 7242180086616818316) && (l[1] == 10045892448872562649) && (l[0] == 1481365419032838079) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		15655215628902554004,
+		15894127656167592378,
+		9702012166408397168,
+		12335982559306940759,
+		1313802173610541430,
+		81629743607937133,
+	}
+	r := uint64(41)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[5] == 28498675542444634) && (t[4] == 13356930855120736188) && (t[3] == 8832319421896135475) && (t[2] == 7242180086616818316) && (t[1] == 10045892448872562649) && (t[0] == 1481365419032838079)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 851295657643717122
+const inversionCorrectionFactorWord1 = 10857859049187504913
+const inversionCorrectionFactorWord2 = 7148604188520083019
+const inversionCorrectionFactorWord3 = 1138623559447261654
+const inversionCorrectionFactorWord4 = 1203095380280779597
+const inversionCorrectionFactorWord5 = 148579538565968037
+
+const invIterationsN = 26
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+		qElementWord4,
+		qElementWord5,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | ((a[4]) << approxHighBitsN)
+		a[4] = (a[4] >> approxLowBitsN) | ((a[5]) << approxHighBitsN)
+		a[5] = (a[5] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | ((b[4]) << approxHighBitsN)
+		b[4] = (b[4] >> approxLowBitsN) | ((b[5]) << approxHighBitsN)
+		b[5] = (b[5] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+	C, t[4] = madd2(m, qElementWord4, x[4], C)
+	C, t[5] = madd2(m, qElementWord5, x[5], C)
+
+	// the high word of m * qElement[5] is at most 62 bits
+	// x[5] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[6] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 4
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+		C, t[i+4] = madd2(m, qElementWord4, t[i+4], C)
+		C, t[i+5] = madd2(m, qElementWord5, t[i+5], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 5
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		C, z[2] = madd2(m, qElementWord3, t[i+3], C)
+		C, z[3] = madd2(m, qElementWord4, t[i+4], C)
+		z[5], z[4] = madd2(m, qElementWord5, t[i+5], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[5] < 283357621510263184 || (z[5] == 283357621510263184 && (z[4] < 6038022134869067682 || (z[4] == 6038022134869067682 && (z[3] < 9513184293603517222 || (z[3] == 9513184293603517222 && (z[2] < 10956628289047010687 || (z[2] == 10956628289047010687 && (z[1] < 14886639130118979584 || (z[1] == 14886639130118979584 && (z[0] < 11045256207009841153))))))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 11045256207009841153, 0)
+		z[1], b = bits.Sub64(z[1], 14886639130118979584, b)
+		z[2], b = bits.Sub64(z[2], 10956628289047010687, b)
+		z[3], b = bits.Sub64(z[3], 9513184293603517222, b)
+		z[4], b = bits.Sub64(z[4], 6038022134869067682, b)
+		z[5], _ = bits.Sub64(z[5], 283357621510263184, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+		z[4], b = bits.Sub64(z[4], 0, b)
+		z[5], b = bits.Sub64(z[5], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[5] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], b = bits.Add64(z[3], qElementWord3, b)
+			z[4], b = bits.Add64(z[4], qElementWord4, b)
+			z[5], _ = bits.Add64(neg1, qElementWord5, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	z[4], b = bits.Sub64(0, x[4], b)
+	z[5], b = bits.Sub64(0, x[5], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+	c, z[4] = madd1(x[4], w, c)
+	c, z[5] = madd1(x[5], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 4 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 4 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[4], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 5 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 5 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[5], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 6 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 5
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+	z[4], carry = bits.Add64(z[4], yTimes[4], carry)
+	z[5], carry = bits.Add64(z[5], yTimes[5], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bls12-378/fp/element_exp.go b/ecc/bls12-378/fp/element_exp.go
new file mode 100644
index 000000000..68439a4d7
--- /dev/null
+++ b/ecc/bls12-378/fp/element_exp.go
@@ -0,0 +1,1040 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// expBySqrtExp is equivalent to z.Exp(x, fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	return     ((_101001 + i386) << 6 + _101) << 3
+	//
+	// Operations: 330 squares 67 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 397: z = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10      = 2*1
+	//	_11      = 1 + _10
+	//	_101     = _10 + _11
+	//	_110     = 1 + _101
+	//	_1001    = _11 + _110
+	//	_1011    = _10 + _1001
+	//	_1100    = 1 + _1011
+	//	_1101    = 1 + _1100
+	//	_10001   = _101 + _1100
+	//	_10011   = _10 + _10001
+	//	_10101   = _10 + _10011
+	//	_11011   = _110 + _10101
+	//	_11101   = _10 + _11011
+	//	_100011  = _110 + _11101
+	//	_100111  = _1100 + _11011
+	//	_101001  = _10 + _100111
+	//	_110101  = _1100 + _101001
+	//	_110111  = _10 + _110101
+	//	_111001  = _10 + _110111
+	//	_111011  = _10 + _111001
+	//	_111101  = _10 + _111011
+	//	_111111  = _10 + _111101
+	//	_1111100 = _111101 + _111111
+	//	_1111111 = _11 + _1111100
+	//	i39      = ((_1111100 << 4 + _11101) << 3 + _11) << 6
+	//	i57      = ((1 + i39) << 9 + _1011) << 6 + _1101
+	//	i78      = ((i57 << 9 + _10011) << 7 + _100011) << 3
+	//	i98      = ((1 + i78) << 11 + _101001) << 6 + _111001
+	//	i117     = ((i98 << 7 + _110101) << 4 + _1101) << 6
+	//	i138     = ((_1001 + i117) << 12 + _111011) << 6 + _10001
+	//	i162     = ((i138 << 11 + _111101) << 6 + _101) << 5
+	//	i184     = ((1 + i162) << 11 + _1011) << 8 + _111101
+	//	i205     = ((i184 << 6 + _11011) << 8 + _100011) << 5
+	//	i227     = ((_10001 + i205) << 12 + _100011) << 7 + _10011
+	//	i257     = ((i227 << 6 + _10011) << 13 + _110111) << 9
+	//	i279     = ((_11011 + i257) << 9 + _1101) << 10 + _101001
+	//	i299     = ((i279 << 8 + _100111) << 2 + 1) << 8
+	//	i311     = ((_1111111 + i299) << 2 + _11) << 7 + _11101
+	//	i331     = ((i311 << 3 + 1) << 8 + _1111111) << 7
+	//	i350     = ((_111011 + i331) << 6 + _10101) << 10 + _10001
+	//	i386     = ((i350 << 3 + _11) << 23 + _10011) << 8
+	//	i399     = ((_101001 + i386) << 6 + _101) << 4 + 1
+	//	return     i399 << 40
+	//
+	// Operations: 371 squares 68 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+		t12 = new(Element)
+		t13 = new(Element)
+		t14 = new(Element)
+		t15 = new(Element)
+		t16 = new(Element)
+		t17 = new(Element)
+		t18 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18 Element
+	// Step 1: t6 = x^0x2
+	t6.Square(&x)
+
+	// Step 2: t2 = x^0x3
+	t2.Mul(&x, t6)
+
+	// Step 3: z = x^0x5
+	z.Mul(t6, t2)
+
+	// Step 4: t0 = x^0x6
+	t0.Mul(&x, z)
+
+	// Step 5: t15 = x^0x9
+	t15.Mul(t2, t0)
+
+	// Step 6: t14 = x^0xb
+	t14.Mul(t6, t15)
+
+	// Step 7: t5 = x^0xc
+	t5.Mul(&x, t14)
+
+	// Step 8: t9 = x^0xd
+	t9.Mul(&x, t5)
+
+	// Step 9: t3 = x^0x11
+	t3.Mul(z, t5)
+
+	// Step 10: t1 = x^0x13
+	t1.Mul(t6, t3)
+
+	// Step 11: t4 = x^0x15
+	t4.Mul(t6, t1)
+
+	// Step 12: t10 = x^0x1b
+	t10.Mul(t0, t4)
+
+	// Step 13: t7 = x^0x1d
+	t7.Mul(t6, t10)
+
+	// Step 14: t12 = x^0x23
+	t12.Mul(t0, t7)
+
+	// Step 15: t8 = x^0x27
+	t8.Mul(t5, t10)
+
+	// Step 16: t0 = x^0x29
+	t0.Mul(t6, t8)
+
+	// Step 17: t16 = x^0x35
+	t16.Mul(t5, t0)
+
+	// Step 18: t11 = x^0x37
+	t11.Mul(t6, t16)
+
+	// Step 19: t17 = x^0x39
+	t17.Mul(t6, t11)
+
+	// Step 20: t5 = x^0x3b
+	t5.Mul(t6, t17)
+
+	// Step 21: t13 = x^0x3d
+	t13.Mul(t6, t5)
+
+	// Step 22: t6 = x^0x3f
+	t6.Mul(t6, t13)
+
+	// Step 23: t18 = x^0x7c
+	t18.Mul(t13, t6)
+
+	// Step 24: t6 = x^0x7f
+	t6.Mul(t2, t18)
+
+	// Step 28: t18 = x^0x7c0
+	for s := 0; s < 4; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 29: t18 = x^0x7dd
+	t18.Mul(t7, t18)
+
+	// Step 32: t18 = x^0x3ee8
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 33: t18 = x^0x3eeb
+	t18.Mul(t2, t18)
+
+	// Step 39: t18 = x^0xfbac0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 40: t18 = x^0xfbac1
+	t18.Mul(&x, t18)
+
+	// Step 49: t18 = x^0x1f758200
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 50: t18 = x^0x1f75820b
+	t18.Mul(t14, t18)
+
+	// Step 56: t18 = x^0x7dd6082c0
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 57: t18 = x^0x7dd6082cd
+	t18.Mul(t9, t18)
+
+	// Step 66: t18 = x^0xfbac1059a00
+	for s := 0; s < 9; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 67: t18 = x^0xfbac1059a13
+	t18.Mul(t1, t18)
+
+	// Step 74: t18 = x^0x7dd6082cd0980
+	for s := 0; s < 7; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 75: t18 = x^0x7dd6082cd09a3
+	t18.Mul(t12, t18)
+
+	// Step 78: t18 = x^0x3eeb0416684d18
+	for s := 0; s < 3; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 79: t18 = x^0x3eeb0416684d19
+	t18.Mul(&x, t18)
+
+	// Step 90: t18 = x^0x1f75820b34268c800
+	for s := 0; s < 11; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 91: t18 = x^0x1f75820b34268c829
+	t18.Mul(t0, t18)
+
+	// Step 97: t18 = x^0x7dd6082cd09a320a40
+	for s := 0; s < 6; s++ {
+		t18.Square(t18)
+	}
+
+	// Step 98: t17 = x^0x7dd6082cd09a320a79
+	t17.Mul(t17, t18)
+
+	// Step 105: t17 = x^0x3eeb0416684d19053c80
+	for s := 0; s < 7; s++ {
+		t17.Square(t17)
+	}
+
+	// Step 106: t16 = x^0x3eeb0416684d19053cb5
+	t16.Mul(t16, t17)
+
+	// Step 110: t16 = x^0x3eeb0416684d19053cb50
+	for s := 0; s < 4; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 111: t16 = x^0x3eeb0416684d19053cb5d
+	t16.Mul(t9, t16)
+
+	// Step 117: t16 = x^0xfbac1059a1346414f2d740
+	for s := 0; s < 6; s++ {
+		t16.Square(t16)
+	}
+
+	// Step 118: t15 = x^0xfbac1059a1346414f2d749
+	t15.Mul(t15, t16)
+
+	// Step 130: t15 = x^0xfbac1059a1346414f2d749000
+	for s := 0; s < 12; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 131: t15 = x^0xfbac1059a1346414f2d74903b
+	t15.Mul(t5, t15)
+
+	// Step 137: t15 = x^0x3eeb0416684d19053cb5d240ec0
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 138: t15 = x^0x3eeb0416684d19053cb5d240ed1
+	t15.Mul(t3, t15)
+
+	// Step 149: t15 = x^0x1f75820b34268c829e5ae920768800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 150: t15 = x^0x1f75820b34268c829e5ae92076883d
+	t15.Mul(t13, t15)
+
+	// Step 156: t15 = x^0x7dd6082cd09a320a796ba481da20f40
+	for s := 0; s < 6; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 157: t15 = x^0x7dd6082cd09a320a796ba481da20f45
+	t15.Mul(z, t15)
+
+	// Step 162: t15 = x^0xfbac1059a1346414f2d74903b441e8a0
+	for s := 0; s < 5; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 163: t15 = x^0xfbac1059a1346414f2d74903b441e8a1
+	t15.Mul(&x, t15)
+
+	// Step 174: t15 = x^0x7dd6082cd09a320a796ba481da20f450800
+	for s := 0; s < 11; s++ {
+		t15.Square(t15)
+	}
+
+	// Step 175: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b
+	t14.Mul(t14, t15)
+
+	// Step 183: t14 = x^0x7dd6082cd09a320a796ba481da20f45080b00
+	for s := 0; s < 8; s++ {
+		t14.Square(t14)
+	}
+
+	// Step 184: t13 = x^0x7dd6082cd09a320a796ba481da20f45080b3d
+	t13.Mul(t13, t14)
+
+	// Step 190: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf40
+	for s := 0; s < 6; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 191: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b
+	t13.Mul(t10, t13)
+
+	// Step 199: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b00
+	for s := 0; s < 8; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 200: t13 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23
+	t13.Mul(t12, t13)
+
+	// Step 205: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6460
+	for s := 0; s < 5; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 206: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471
+	t13.Mul(t3, t13)
+
+	// Step 218: t13 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471000
+	for s := 0; s < 12; s++ {
+		t13.Square(t13)
+	}
+
+	// Step 219: t12 = x^0x3eeb0416684d19053cb5d240ed107a284059eb6471023
+	t12.Mul(t12, t13)
+
+	// Step 226: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881180
+	for s := 0; s < 7; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 227: t12 = x^0x1f75820b34268c829e5ae92076883d14202cf5b23881193
+	t12.Mul(t1, t12)
+
+	// Step 233: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464c0
+	for s := 0; s < 6; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 234: t12 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d3
+	t12.Mul(t1, t12)
+
+	// Step 247: t12 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6000
+	for s := 0; s < 13; s++ {
+		t12.Square(t12)
+	}
+
+	// Step 248: t11 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a6037
+	t11.Mul(t11, t12)
+
+	// Step 257: t11 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e00
+	for s := 0; s < 9; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 258: t10 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b
+	t10.Mul(t10, t11)
+
+	// Step 267: t10 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc3600
+	for s := 0; s < 9; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 268: t9 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d
+	t9.Mul(t9, t10)
+
+	// Step 278: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83400
+	for s := 0; s < 10; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 279: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429
+	t9.Mul(t0, t9)
+
+	// Step 287: t9 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342900
+	for s := 0; s < 8; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 288: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d8342927
+	t8.Mul(t8, t9)
+
+	// Step 290: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49c
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 291: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d
+	t8.Mul(&x, t8)
+
+	// Step 299: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d00
+	for s := 0; s < 8; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 300: t8 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7f
+	t8.Mul(t6, t8)
+
+	// Step 302: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275fc
+	for s := 0; s < 2; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 303: t8 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff
+	t8.Mul(t2, t8)
+
+	// Step 310: t8 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff80
+	for s := 0; s < 7; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 311: t7 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d
+	t7.Mul(t7, t8)
+
+	// Step 314: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce8
+	for s := 0; s < 3; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 315: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce9
+	t7.Mul(&x, t7)
+
+	// Step 323: t7 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce900
+	for s := 0; s < 8; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 324: t6 = x^0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f
+	t6.Mul(t6, t7)
+
+	// Step 331: t6 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bf80
+	for s := 0; s < 7; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 332: t5 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb
+	t5.Mul(t5, t6)
+
+	// Step 338: t5 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feec0
+	for s := 0; s < 6; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 339: t4 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5
+	t4.Mul(t4, t5)
+
+	// Step 349: t4 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5400
+	for s := 0; s < 10; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 350: t3 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411
+	t3.Mul(t3, t4)
+
+	// Step 353: t3 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa088
+	for s := 0; s < 3; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 354: t2 = x^0xfbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b
+	t2.Mul(t2, t3)
+
+	// Step 377: t2 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800000
+	for s := 0; s < 23; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 378: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed5045800013
+	t1.Mul(t1, t2)
+
+	// Step 386: t1 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001300
+	for s := 0; s < 8; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 387: t0 = x^0x7dd6082cd09a320a796ba481da20f45080b3d6c8e20464d301b86c1a1493aff9d2feed504580001329
+	t0.Mul(t0, t1)
+
+	// Step 393: t0 = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 394: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca45
+	z.Mul(z, t0)
+
+	// Step 398: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca450
+	for s := 0; s < 4; s++ {
+		z.Square(z)
+	}
+
+	// Step 399: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca451
+	z.Mul(&x, z)
+
+	// Step 439: z = x^0x1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
+	for s := 0; s < 40; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bls12-378/fp/element_fuzz.go b/ecc/bls12-378/fp/element_fuzz.go
new file mode 100644
index 000000000..0d948021a
--- /dev/null
+++ b/ecc/bls12-378/fp/element_fuzz.go
@@ -0,0 +1,152 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[5] %= qElement[5]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+		z[4], b = bits.Sub64(z[4], qElement[4], b)
+		z[5], b = bits.Sub64(z[5], qElement[5], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bls12-378/fp/element_mul_adx_amd64.s b/ecc/bls12-378/fp/element_mul_adx_amd64.s
new file mode 100644
index 000000000..a6f902c36
--- /dev/null
+++ b/ecc/bls12-378/fp/element_mul_adx_amd64.s
@@ -0,0 +1,836 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
diff --git a/ecc/bls12-378/fp/element_mul_amd64.s b/ecc/bls12-378/fp/element_mul_amd64.s
new file mode 100644
index 000000000..171a75360
--- /dev/null
+++ b/ecc/bls12-378/fp/element_mul_amd64.s
@@ -0,0 +1,858 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), R8
+
+	// x[0] -> R10
+	// x[1] -> R11
+	// x[2] -> R12
+	MOVQ 0(R8), R10
+	MOVQ 8(R8), R11
+	MOVQ 16(R8), R12
+	MOVQ y+16(FP), R13
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R15
+	// t[2] -> CX
+	// t[3] -> BX
+	// t[4] -> SI
+	// t[5] -> DI
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R13), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ R10, R14, R15
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R11, AX, CX
+	ADOXQ AX, R15
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R12, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ 24(R8), AX, SI
+	ADOXQ AX, BX
+
+	// (A,t[4])  := x[4]*y[0] + A
+	MULXQ 32(R8), AX, DI
+	ADOXQ AX, SI
+
+	// (A,t[5])  := x[5]*y[0] + A
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[1] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[1] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[2] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[2] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[3] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[3] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 32(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[4] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[4] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[4] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[4] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[4] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[4] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 40(R13), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[5] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[5] + A
+	ADCXQ BP, R15
+	MULXQ R11, AX, BP
+	ADOXQ AX, R15
+
+	// (A,t[2])  := t[2] + x[2]*y[5] + A
+	ADCXQ BP, CX
+	MULXQ R12, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[5] + A
+	ADCXQ BP, BX
+	MULXQ 24(R8), AX, BP
+	ADOXQ AX, BX
+
+	// (A,t[4])  := t[4] + x[4]*y[5] + A
+	ADCXQ BP, SI
+	MULXQ 32(R8), AX, BP
+	ADOXQ AX, SI
+
+	// (A,t[5])  := t[5] + x[5]*y[5] + A
+	ADCXQ BP, DI
+	MULXQ 40(R8), AX, BP
+	ADOXQ AX, DI
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R9
+	ADCXQ R14, AX
+	MOVQ  R9, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+
+	// t[5] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ BP, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R9,R8,R13,R10,R11,R12)
+	REDUCE(R14,R15,CX,BX,SI,DI,R9,R8,R13,R10,R11,R12)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R15
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	MOVQ 32(DX), SI
+	MOVQ 40(DX), DI
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R15, R14
+	MULXQ q<>+8(SB), AX, R15
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R15
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R15
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// (C,t[3]) := t[4] + m*q[4] + C
+	ADCXQ SI, BX
+	MULXQ q<>+32(SB), AX, SI
+	ADOXQ AX, BX
+
+	// (C,t[4]) := t[5] + m*q[5] + C
+	ADCXQ DI, SI
+	MULXQ q<>+40(SB), AX, DI
+	ADOXQ AX, SI
+	MOVQ  $0, AX
+	ADCXQ AX, DI
+	ADOXQ AX, DI
+
+	// reduce element(R14,R15,CX,BX,SI,DI) using temp registers (R8,R9,R10,R11,R12,R13)
+	REDUCE(R14,R15,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R15, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	MOVQ SI, 32(AX)
+	MOVQ DI, 40(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bls12-378/fp/element_ops_amd64.go b/ecc/bls12-378/fp/element_ops_amd64.go
new file mode 100644
index 000000000..73a3711ec
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bls12-378/fp/element_ops_amd64.s b/ecc/bls12-378/fp/element_ops_amd64.s
new file mode 100644
index 000000000..97da07d77
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_amd64.s
@@ -0,0 +1,452 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+	ADCQ 32(DX), R8
+	ADCQ 40(DX), R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ res+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 0(R8)
+	MOVQ    DX, 8(R8)
+	MOVQ    CX, 16(R8)
+	MOVQ    BX, 24(R8)
+	MOVQ    SI, 32(R8)
+	MOVQ    DI, 40(R8)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ res+0(FP), R15
+	MOVQ DX, 0(R15)
+	MOVQ CX, 8(R15)
+	MOVQ BX, 16(R15)
+	MOVQ SI, 24(R15)
+	MOVQ DI, 32(R15)
+	MOVQ R8, 40(R15)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), R9
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  32(AX), DI
+	MOVQ  40(AX), R8
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x9948a20000000001, R10
+	SUBQ  DX, R10
+	MOVQ  R10, 0(R9)
+	MOVQ  $0xce97f76a822c0000, R10
+	SBBQ  CX, R10
+	MOVQ  R10, 8(R9)
+	MOVQ  $0x980dc360d0a49d7f, R10
+	SBBQ  BX, R10
+	MOVQ  R10, 16(R9)
+	MOVQ  $0x84059eb647102326, R10
+	SBBQ  SI, R10
+	MOVQ  R10, 24(R9)
+	MOVQ  $0x53cb5d240ed107a2, R10
+	SBBQ  DI, R10
+	MOVQ  R10, 32(R9)
+	MOVQ  $0x03eeb0416684d190, R10
+	SBBQ  R8, R10
+	MOVQ  R10, 40(R9)
+	RET
+
+l1:
+	MOVQ AX, 0(R9)
+	MOVQ AX, 8(R9)
+	MOVQ AX, 16(R9)
+	MOVQ AX, 24(R9)
+	MOVQ AX, 32(R9)
+	MOVQ AX, 40(R9)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,R9,R10,R11,R12,R13)
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,R9,R10,R11,R12,R13)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R14,R15,R9,R10,R11,R12)
+	REDUCE(DX,CX,BX,SI,DI,R8,R14,R15,R9,R10,R11,R12)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), $40-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	MOVQ 32(AX), DI
+	MOVQ 40(AX), R8
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+	REDUCE(DX,CX,BX,SI,DI,R8,R15,s0-8(SP),s1-16(SP),s2-24(SP),s3-32(SP),s4-40(SP))
+
+	MOVQ DX, R15
+	MOVQ CX, s0-8(SP)
+	MOVQ BX, s1-16(SP)
+	MOVQ SI, s2-24(SP)
+	MOVQ DI, s3-32(SP)
+	MOVQ R8, s4-40(SP)
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ R15, DX
+	ADCQ s0-8(SP), CX
+	ADCQ s1-16(SP), BX
+	ADCQ s2-24(SP), SI
+	ADCQ s3-32(SP), DI
+	ADCQ s4-40(SP), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+	ADCQ 32(AX), DI
+	ADCQ 40(AX), R8
+
+	// reduce element(DX,CX,BX,SI,DI,R8) using temp registers (R9,R10,R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	MOVQ DI, 32(AX)
+	MOVQ R8, 40(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), $48-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    32(AX), R8
+	MOVQ    40(AX), R9
+	MOVQ    CX, R10
+	MOVQ    BX, R11
+	MOVQ    SI, R12
+	MOVQ    DI, R13
+	MOVQ    R8, R14
+	MOVQ    R9, R15
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	ADCQ    32(DX), R8
+	ADCQ    40(DX), R9
+	SUBQ    0(DX), R10
+	SBBQ    8(DX), R11
+	SBBQ    16(DX), R12
+	SBBQ    24(DX), R13
+	SBBQ    32(DX), R14
+	SBBQ    40(DX), R15
+	MOVQ    CX, s0-8(SP)
+	MOVQ    BX, s1-16(SP)
+	MOVQ    SI, s2-24(SP)
+	MOVQ    DI, s3-32(SP)
+	MOVQ    R8, s4-40(SP)
+	MOVQ    R9, s5-48(SP)
+	MOVQ    $0x9948a20000000001, CX
+	MOVQ    $0xce97f76a822c0000, BX
+	MOVQ    $0x980dc360d0a49d7f, SI
+	MOVQ    $0x84059eb647102326, DI
+	MOVQ    $0x53cb5d240ed107a2, R8
+	MOVQ    $0x03eeb0416684d190, R9
+	CMOVQCC AX, CX
+	CMOVQCC AX, BX
+	CMOVQCC AX, SI
+	CMOVQCC AX, DI
+	CMOVQCC AX, R8
+	CMOVQCC AX, R9
+	ADDQ    CX, R10
+	ADCQ    BX, R11
+	ADCQ    SI, R12
+	ADCQ    DI, R13
+	ADCQ    R8, R14
+	ADCQ    R9, R15
+	MOVQ    s0-8(SP), CX
+	MOVQ    s1-16(SP), BX
+	MOVQ    s2-24(SP), SI
+	MOVQ    s3-32(SP), DI
+	MOVQ    s4-40(SP), R8
+	MOVQ    s5-48(SP), R9
+	MOVQ    R10, 0(DX)
+	MOVQ    R11, 8(DX)
+	MOVQ    R12, 16(DX)
+	MOVQ    R13, 24(DX)
+	MOVQ    R14, 32(DX)
+	MOVQ    R15, 40(DX)
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	MOVQ R8, 32(AX)
+	MOVQ R9, 40(AX)
+	RET
diff --git a/ecc/bls12-378/fp/element_ops_noasm.go b/ecc/bls12-378/fp/element_ops_noasm.go
new file mode 100644
index 000000000..fec628918
--- /dev/null
+++ b/ecc/bls12-378/fp/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bls12-378/fp/element_test.go b/ecc/bls12-378/fp/element_test.go
new file mode 100644
index 000000000..72b71ebc5
--- /dev/null
+++ b/ecc/bls12-378/fp/element_test.go
@@ -0,0 +1,2681 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fp
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		13541478318970833666,
+		5510290684934426267,
+		8467587974331926354,
+		13931463632695577534,
+		3531303697457869800,
+		51529254522778566,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[5]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[5]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000", 16)
+	const sqrtExponentElement = "fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[5] > qElement[5] {
+		return true
+	}
+	if z[5] < qElement[5] {
+		return false
+	}
+
+	if z[4] > qElement[4] {
+		return true
+	}
+	if z[4] < qElement[4] {
+		return false
+	}
+
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[5] != ^uint64(0) {
+			g.element[5] %= (qElement[5] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[5] != ^uint64(0) {
+				g.element[5] %= (qElement[5] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[5] != ^uint64(0) {
+				g[5] %= (qElement[5] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[5] != ^uint64(0) {
+					g[5] %= (qElement[5] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], carry = bits.Add64(a[3], qElement[3], carry)
+		a[4], carry = bits.Add64(a[4], qElement[4], carry)
+		a[5], _ = bits.Add64(a[5], qElement[5], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+		inversionCorrectionFactorWord4,
+		inversionCorrectionFactorWord5,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+			inversionCorrectionFactorWord4,
+			inversionCorrectionFactorWord5,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bls12-378/fr/arith.go b/ecc/bls12-378/fr/arith.go
new file mode 100644
index 000000000..83c9fd9ef
--- /dev/null
+++ b/ecc/bls12-378/fr/arith.go
@@ -0,0 +1,60 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"math/bits"
+)
+
+// madd0 hi = a*b + c (discards lo bits)
+func madd0(a, b, c uint64) (hi uint64) {
+	var carry, lo uint64
+	hi, lo = bits.Mul64(a, b)
+	_, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd1 hi, lo = a*b + c
+func madd1(a, b, c uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+// madd2 hi, lo = a*b + c + d
+func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	return
+}
+
+func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
+	var carry uint64
+	hi, lo = bits.Mul64(a, b)
+	c, carry = bits.Add64(c, d, 0)
+	hi, _ = bits.Add64(hi, 0, carry)
+	lo, carry = bits.Add64(lo, c, 0)
+	hi, _ = bits.Add64(hi, e, carry)
+	return
+}
diff --git a/ecc/bls12-378/fr/asm.go b/ecc/bls12-378/fr/asm.go
new file mode 100644
index 000000000..8241357c4
--- /dev/null
+++ b/ecc/bls12-378/fr/asm.go
@@ -0,0 +1,24 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import "golang.org/x/sys/cpu"
+
+var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
diff --git a/ecc/bls12-378/fr/asm_noadx.go b/ecc/bls12-378/fr/asm_noadx.go
new file mode 100644
index 000000000..221beab93
--- /dev/null
+++ b/ecc/bls12-378/fr/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/fr/doc.go b/ecc/bls12-378/fr/doc.go
new file mode 100644
index 000000000..2425cb964
--- /dev/null
+++ b/ecc/bls12-378/fr/doc.go
@@ -0,0 +1,43 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fr contains field arithmetic operations for modulus = 0x20e7b9...000001.
+//
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+//
+// The modulus is hardcoded in all the operations.
+//
+// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
+// 	type Element [4]uint64
+//
+// Example API signature
+// 	// Mul z = x * y mod q
+// 	func (z *Element) Mul(x, y *Element) *Element
+//
+// and can be used like so:
+// 	var a, b Element
+// 	a.SetUint64(2)
+// 	b.SetString("984896738")
+// 	a.Mul(a, b)
+// 	a.Sub(a, a)
+// 	 .Add(a, b)
+// 	 .Inv(a)
+// 	b.Exp(b, new(big.Int).SetUint64(42))
+//
+// Modulus
+// 	0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da09400013291440000000001 // base 16
+// 	14883435066912132899950318861128167269793560281114003360875131245101026639873 // base 10
+package fr
diff --git a/ecc/bls12-378/fr/element.go b/ecc/bls12-378/fr/element.go
new file mode 100644
index 000000000..b84a12c1b
--- /dev/null
+++ b/ecc/bls12-378/fr/element.go
@@ -0,0 +1,1466 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"errors"
+	"io"
+	"math/big"
+	"math/bits"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+)
+
+// Element represents a field element stored on 4 words (uint64)
+// Element are assumed to be in Montgomery form in all methods
+// field modulus q =
+//
+// 14883435066912132899950318861128167269793560281114003360875131245101026639873
+type Element [4]uint64
+
+// Limbs number of 64 bits words needed to represent Element
+const Limbs = 4
+
+// Bits number bits needed to represent Element
+const Bits = 254
+
+// Bytes number bytes needed to represent Element
+const Bytes = Limbs * 8
+
+// field modulus stored as big.Int
+var _modulus big.Int
+
+// Modulus returns q as a big.Int
+// q =
+//
+// 14883435066912132899950318861128167269793560281114003360875131245101026639873
+func Modulus() *big.Int {
+	return new(big.Int).Set(&_modulus)
+}
+
+// q (modulus)
+const qElementWord0 uint64 = 3643768340310130689
+const qElementWord1 uint64 = 16926637627159085057
+const qElementWord2 uint64 = 9761692607219216639
+const qElementWord3 uint64 = 2371068001496280753
+
+var qElement = Element{
+	qElementWord0,
+	qElementWord1,
+	qElementWord2,
+	qElementWord3,
+}
+
+// Used for Montgomery reduction. (qInvNeg) q + r'.r = 1, i.e., qInvNeg = - q⁻¹ mod r
+const qInvNegLsw uint64 = 3643768340310130687
+
+// rSquare
+var rSquare = Element{
+	1260465344847950704,
+	15627634503313390135,
+	1085346480195626314,
+	405261321576397495,
+}
+
+var bigIntPool = sync.Pool{
+	New: func() interface{} {
+		return new(big.Int)
+	},
+}
+
+func init() {
+	_modulus.SetString("14883435066912132899950318861128167269793560281114003360875131245101026639873", 10)
+}
+
+// NewElement returns a new Element from a uint64 value
+//
+// it is equivalent to
+// 		var v NewElement
+// 		v.SetUint64(...)
+func NewElement(v uint64) Element {
+	z := Element{v}
+	z.Mul(&z, &rSquare)
+	return z
+}
+
+// SetUint64 sets z to v and returns z
+func (z *Element) SetUint64(v uint64) *Element {
+	//  sets z LSB to v (non-Montgomery form) and convert z to Montgomery form
+	*z = Element{v}
+	return z.Mul(z, &rSquare) // z.ToMont()
+}
+
+// SetInt64 sets z to v and returns z
+func (z *Element) SetInt64(v int64) *Element {
+
+	// absolute value of v
+	m := v >> 63
+	z.SetUint64(uint64((v ^ m) - m))
+
+	if m != 0 {
+		// v is negative
+		z.Neg(z)
+	}
+
+	return z
+}
+
+// Set z = x
+func (z *Element) Set(x *Element) *Element {
+	z[0] = x[0]
+	z[1] = x[1]
+	z[2] = x[2]
+	z[3] = x[3]
+	return z
+}
+
+// SetInterface converts provided interface into Element
+// returns an error if provided type is not supported
+// supported types: Element, *Element, uint64, int, string (interpreted as base10 integer),
+// *big.Int, big.Int, []byte
+func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
+	switch c1 := i1.(type) {
+	case Element:
+		return z.Set(&c1), nil
+	case *Element:
+		return z.Set(c1), nil
+	case uint8:
+		return z.SetUint64(uint64(c1)), nil
+	case uint16:
+		return z.SetUint64(uint64(c1)), nil
+	case uint32:
+		return z.SetUint64(uint64(c1)), nil
+	case uint:
+		return z.SetUint64(uint64(c1)), nil
+	case uint64:
+		return z.SetUint64(c1), nil
+	case int8:
+		return z.SetInt64(int64(c1)), nil
+	case int16:
+		return z.SetInt64(int64(c1)), nil
+	case int32:
+		return z.SetInt64(int64(c1)), nil
+	case int64:
+		return z.SetInt64(c1), nil
+	case int:
+		return z.SetInt64(int64(c1)), nil
+	case string:
+		return z.SetString(c1), nil
+	case *big.Int:
+		return z.SetBigInt(c1), nil
+	case big.Int:
+		return z.SetBigInt(&c1), nil
+	case []byte:
+		return z.SetBytes(c1), nil
+	default:
+		return nil, errors.New("can't set fr.Element from type " + reflect.TypeOf(i1).String())
+	}
+}
+
+// SetZero z = 0
+func (z *Element) SetZero() *Element {
+	z[0] = 0
+	z[1] = 0
+	z[2] = 0
+	z[3] = 0
+	return z
+}
+
+// SetOne z = 1 (in Montgomery form)
+func (z *Element) SetOne() *Element {
+	z[0] = 11387109765248188409
+	z[1] = 10640745125853265911
+	z[2] = 5455128044303689984
+	z[3] = 1849268063235586341
+	return z
+}
+
+// Div z = x*y^-1 mod q
+func (z *Element) Div(x, y *Element) *Element {
+	var yInv Element
+	yInv.Inverse(y)
+	z.Mul(x, &yInv)
+	return z
+}
+
+// Bit returns the i'th bit, with lsb == bit 0.
+// It is the responsability of the caller to convert from Montgomery to Regular form if needed
+func (z *Element) Bit(i uint64) uint64 {
+	j := i / 64
+	if j >= 4 {
+		return 0
+	}
+	return uint64(z[j] >> (i % 64) & 1)
+}
+
+// Equal returns z == x
+func (z *Element) Equal(x *Element) bool {
+	return (z[3] == x[3]) && (z[2] == x[2]) && (z[1] == x[1]) && (z[0] == x[0])
+}
+
+// IsZero returns z == 0
+func (z *Element) IsZero() bool {
+	return (z[3] | z[2] | z[1] | z[0]) == 0
+}
+
+// IsUint64 reports whether z can be represented as an uint64.
+func (z *Element) IsUint64() bool {
+	return (z[3] | z[2] | z[1]) == 0
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *Element) Cmp(x *Element) int {
+	_z := *z
+	_x := *x
+	_z.FromMont()
+	_x.FromMont()
+	if _z[3] > _x[3] {
+		return 1
+	} else if _z[3] < _x[3] {
+		return -1
+	}
+	if _z[2] > _x[2] {
+		return 1
+	} else if _z[2] < _x[2] {
+		return -1
+	}
+	if _z[1] > _x[1] {
+		return 1
+	} else if _z[1] < _x[1] {
+		return -1
+	}
+	if _z[0] > _x[0] {
+		return 1
+	} else if _z[0] < _x[0] {
+		return -1
+	}
+	return 0
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *Element) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	// we check if the element is larger than (q-1) / 2
+	// if z - (((q -1) / 2) + 1) have no underflow, then z > (q-1) / 2
+
+	_z := *z
+	_z.FromMont()
+
+	var b uint64
+	_, b = bits.Sub64(_z[0], 11045256207009841153, 0)
+	_, b = bits.Sub64(_z[1], 17686690850434318336, b)
+	_, b = bits.Sub64(_z[2], 14104218340464384127, b)
+	_, b = bits.Sub64(_z[3], 1185534000748140376, b)
+
+	return b == 0
+}
+
+// SetRandom sets z to a random element < q
+func (z *Element) SetRandom() (*Element, error) {
+	var bytes [32]byte
+	if _, err := io.ReadFull(rand.Reader, bytes[:]); err != nil {
+		return nil, err
+	}
+	z[0] = binary.BigEndian.Uint64(bytes[0:8])
+	z[1] = binary.BigEndian.Uint64(bytes[8:16])
+	z[2] = binary.BigEndian.Uint64(bytes[16:24])
+	z[3] = binary.BigEndian.Uint64(bytes[24:32])
+	z[3] %= 2371068001496280753
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+
+	return z, nil
+}
+
+// One returns 1 (in montgommery form)
+func One() Element {
+	var one Element
+	one.SetOne()
+	return one
+}
+
+// Halve sets z to z / 2 (mod p)
+func (z *Element) Halve() {
+	if z[0]&1 == 1 {
+		var carry uint64
+
+		// z = z + q
+		z[0], carry = bits.Add64(z[0], 3643768340310130689, 0)
+		z[1], carry = bits.Add64(z[1], 16926637627159085057, carry)
+		z[2], carry = bits.Add64(z[2], 9761692607219216639, carry)
+		z[3], _ = bits.Add64(z[3], 2371068001496280753, carry)
+
+	}
+
+	// z = z >> 1
+
+	z[0] = z[0]>>1 | z[1]<<63
+	z[1] = z[1]>>1 | z[2]<<63
+	z[2] = z[2]>>1 | z[3]<<63
+	z[3] >>= 1
+
+}
+
+// API with assembly impl
+
+// Mul z = x * y mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Mul(x, y *Element) *Element {
+	mul(z, x, y)
+	return z
+}
+
+// Square z = x * x mod q
+// see https://hackmd.io/@zkteam/modular_multiplication
+func (z *Element) Square(x *Element) *Element {
+	mul(z, x, x)
+	return z
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func (z *Element) FromMont() *Element {
+	fromMont(z)
+	return z
+}
+
+// Add z = x + y mod q
+func (z *Element) Add(x, y *Element) *Element {
+	add(z, x, y)
+	return z
+}
+
+// Double z = x + x mod q, aka Lsh 1
+func (z *Element) Double(x *Element) *Element {
+	double(z, x)
+	return z
+}
+
+// Sub  z = x - y mod q
+func (z *Element) Sub(x, y *Element) *Element {
+	sub(z, x, y)
+	return z
+}
+
+// Neg z = q - x
+func (z *Element) Neg(x *Element) *Element {
+	neg(z, x)
+	return z
+}
+
+// Generic (no ADX instructions, no AMD64) versions of multiplication and squaring algorithms
+
+func _mulGeneric(z, x, y *Element) {
+
+	var t [4]uint64
+	var c [3]uint64
+	{
+		// round 0
+		v := x[0]
+		c[1], c[0] = bits.Mul64(v, y[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd1(v, y[1], c[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd1(v, y[2], c[1])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd1(v, y[3], c[1])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 1
+		v := x[1]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 2
+		v := x[2]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], t[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], t[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		t[3], t[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+	{
+		// round 3
+		v := x[3]
+		c[1], c[0] = madd1(v, y[0], t[0])
+		m := c[0] * 3643768340310130687
+		c[2] = madd0(m, 3643768340310130689, c[0])
+		c[1], c[0] = madd2(v, y[1], c[1], t[1])
+		c[2], z[0] = madd2(m, 16926637627159085057, c[2], c[0])
+		c[1], c[0] = madd2(v, y[2], c[1], t[2])
+		c[2], z[1] = madd2(m, 9761692607219216639, c[2], c[0])
+		c[1], c[0] = madd2(v, y[3], c[1], t[3])
+		z[3], z[2] = madd3(m, 2371068001496280753, c[0], c[2], c[1])
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _mulWGeneric(z, x *Element, y uint64) {
+
+	var t [4]uint64
+	{
+		// round 0
+		c1, c0 := bits.Mul64(y, x[0])
+		m := c0 * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, c0)
+		c1, c0 = madd1(y, x[1], c1)
+		c2, t[0] = madd2(m, 16926637627159085057, c2, c0)
+		c1, c0 = madd1(y, x[2], c1)
+		c2, t[1] = madd2(m, 9761692607219216639, c2, c0)
+		c1, c0 = madd1(y, x[3], c1)
+		t[3], t[2] = madd3(m, 2371068001496280753, c0, c2, c1)
+	}
+	{
+		// round 1
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, t[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, t[1] = madd2(m, 9761692607219216639, c2, t[2])
+		t[3], t[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+	{
+		// round 2
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, t[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, t[1] = madd2(m, 9761692607219216639, c2, t[2])
+		t[3], t[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+	{
+		// round 3
+		m := t[0] * 3643768340310130687
+		c2 := madd0(m, 3643768340310130689, t[0])
+		c2, z[0] = madd2(m, 16926637627159085057, c2, t[1])
+		c2, z[1] = madd2(m, 9761692607219216639, c2, t[2])
+		z[3], z[2] = madd2(m, 2371068001496280753, t[3], c2)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _fromMontGeneric(z *Element) {
+	// the following lines implement z = z * 1
+	// with a modified CIOS montgomery multiplication
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+	{
+		// m = z[0]n'[0] mod W
+		m := z[0] * 3643768340310130687
+		C := madd0(m, 3643768340310130689, z[0])
+		C, z[0] = madd2(m, 16926637627159085057, z[1], C)
+		C, z[1] = madd2(m, 9761692607219216639, z[2], C)
+		C, z[2] = madd2(m, 2371068001496280753, z[3], C)
+		z[3] = C
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _addGeneric(z, x, y *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], y[0], 0)
+	z[1], carry = bits.Add64(x[1], y[1], carry)
+	z[2], carry = bits.Add64(x[2], y[2], carry)
+	z[3], _ = bits.Add64(x[3], y[3], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _doubleGeneric(z, x *Element) {
+	var carry uint64
+
+	z[0], carry = bits.Add64(x[0], x[0], 0)
+	z[1], carry = bits.Add64(x[1], x[1], carry)
+	z[2], carry = bits.Add64(x[2], x[2], carry)
+	z[3], _ = bits.Add64(x[3], x[3], carry)
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func _subGeneric(z, x, y *Element) {
+	var b uint64
+	z[0], b = bits.Sub64(x[0], y[0], 0)
+	z[1], b = bits.Sub64(x[1], y[1], b)
+	z[2], b = bits.Sub64(x[2], y[2], b)
+	z[3], b = bits.Sub64(x[3], y[3], b)
+	if b != 0 {
+		var c uint64
+		z[0], c = bits.Add64(z[0], 3643768340310130689, 0)
+		z[1], c = bits.Add64(z[1], 16926637627159085057, c)
+		z[2], c = bits.Add64(z[2], 9761692607219216639, c)
+		z[3], _ = bits.Add64(z[3], 2371068001496280753, c)
+	}
+}
+
+func _negGeneric(z, x *Element) {
+	if x.IsZero() {
+		z.SetZero()
+		return
+	}
+	var borrow uint64
+	z[0], borrow = bits.Sub64(3643768340310130689, x[0], 0)
+	z[1], borrow = bits.Sub64(16926637627159085057, x[1], borrow)
+	z[2], borrow = bits.Sub64(9761692607219216639, x[2], borrow)
+	z[3], _ = bits.Sub64(2371068001496280753, x[3], borrow)
+}
+
+func _reduceGeneric(z *Element) {
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+}
+
+func mulByConstant(z *Element, c uint8) {
+	switch c {
+	case 0:
+		z.SetZero()
+		return
+	case 1:
+		return
+	case 2:
+		z.Double(z)
+		return
+	case 3:
+		_z := *z
+		z.Double(z).Add(z, &_z)
+	case 5:
+		_z := *z
+		z.Double(z).Double(z).Add(z, &_z)
+	default:
+		var y Element
+		y.SetUint64(uint64(c))
+		z.Mul(z, &y)
+	}
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []Element) []Element {
+	res := make([]Element, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	accumulator := One()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i] = accumulator
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
+
+func _butterflyGeneric(a, b *Element) {
+	t := *a
+	a.Add(a, b)
+	b.Sub(&t, b)
+}
+
+// BitLen returns the minimum number of bits needed to represent z
+// returns 0 if z == 0
+func (z *Element) BitLen() int {
+	if z[3] != 0 {
+		return 192 + bits.Len64(z[3])
+	}
+	if z[2] != 0 {
+		return 128 + bits.Len64(z[2])
+	}
+	if z[1] != 0 {
+		return 64 + bits.Len64(z[1])
+	}
+	return bits.Len64(z[0])
+}
+
+// Exp z = x^exponent mod q
+func (z *Element) Exp(x Element, exponent *big.Int) *Element {
+	var bZero big.Int
+	if exponent.Cmp(&bZero) == 0 {
+		return z.SetOne()
+	}
+
+	z.Set(&x)
+
+	for i := exponent.BitLen() - 2; i >= 0; i-- {
+		z.Square(z)
+		if exponent.Bit(i) == 1 {
+			z.Mul(z, &x)
+		}
+	}
+
+	return z
+}
+
+// ToMont converts z to Montgomery form
+// sets and returns z = z * r²
+func (z *Element) ToMont() *Element {
+	return z.Mul(z, &rSquare)
+}
+
+// ToRegular returns z in regular form (doesn't mutate z)
+func (z Element) ToRegular() Element {
+	return *z.FromMont()
+}
+
+// String returns the decimal representation of z as generated by
+// z.Text(10).
+func (z *Element) String() string {
+	return z.Text(10)
+}
+
+// Text returns the string representation of z in the given base.
+// Base must be between 2 and 36, inclusive. The result uses the
+// lower-case letters 'a' to 'z' for digit values 10 to 35.
+// No prefix (such as "0x") is added to the string. If z is a nil
+// pointer it returns "<nil>".
+// If base == 10 and -z fits in a uint64 prefix "-" is added to the string.
+func (z *Element) Text(base int) string {
+	if base < 2 || base > 36 {
+		panic("invalid base")
+	}
+	if z == nil {
+		return "<nil>"
+	}
+	zz := *z
+	zz.FromMont()
+	if zz.IsUint64() {
+		return strconv.FormatUint(zz[0], base)
+	} else if base == 10 {
+		var zzNeg Element
+		zzNeg.Neg(z)
+		zzNeg.FromMont()
+		if zzNeg.IsUint64() {
+			return "-" + strconv.FormatUint(zzNeg[0], base)
+		}
+	}
+	vv := bigIntPool.Get().(*big.Int)
+	r := zz.ToBigInt(vv).Text(base)
+	bigIntPool.Put(vv)
+	return r
+}
+
+// ToBigInt returns z as a big.Int in Montgomery form
+func (z *Element) ToBigInt(res *big.Int) *big.Int {
+	var b [Limbs * 8]byte
+	binary.BigEndian.PutUint64(b[24:32], z[0])
+	binary.BigEndian.PutUint64(b[16:24], z[1])
+	binary.BigEndian.PutUint64(b[8:16], z[2])
+	binary.BigEndian.PutUint64(b[0:8], z[3])
+
+	return res.SetBytes(b[:])
+}
+
+// ToBigIntRegular returns z as a big.Int in regular form
+func (z Element) ToBigIntRegular(res *big.Int) *big.Int {
+	z.FromMont()
+	return z.ToBigInt(res)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+func (z *Element) Bytes() (res [Limbs * 8]byte) {
+	_z := z.ToRegular()
+	binary.BigEndian.PutUint64(res[24:32], _z[0])
+	binary.BigEndian.PutUint64(res[16:24], _z[1])
+	binary.BigEndian.PutUint64(res[8:16], _z[2])
+	binary.BigEndian.PutUint64(res[0:8], _z[3])
+
+	return
+}
+
+// Marshal returns the regular (non montgomery) value
+// of z as a big-endian byte slice.
+func (z *Element) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// SetBytes interprets e as the bytes of a big-endian unsigned integer,
+// sets z to that value (in Montgomery form), and returns z.
+func (z *Element) SetBytes(e []byte) *Element {
+	// get a big int from our pool
+	vv := bigIntPool.Get().(*big.Int)
+	vv.SetBytes(e)
+
+	// set big int
+	z.SetBigInt(vv)
+
+	// put temporary object back in pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// SetBigInt sets z to v (regular form) and returns z in Montgomery form
+func (z *Element) SetBigInt(v *big.Int) *Element {
+	z.SetZero()
+
+	var zero big.Int
+
+	// fast path
+	c := v.Cmp(&_modulus)
+	if c == 0 {
+		// v == 0
+		return z
+	} else if c != 1 && v.Cmp(&zero) != -1 {
+		// 0 < v < q
+		return z.setBigInt(v)
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	// copy input + modular reduction
+	vv.Set(v)
+	vv.Mod(v, &_modulus)
+
+	// set big int byte value
+	z.setBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return z
+}
+
+// setBigInt assumes 0 ⩽ v < q
+func (z *Element) setBigInt(v *big.Int) *Element {
+	vBits := v.Bits()
+
+	if bits.UintSize == 64 {
+		for i := 0; i < len(vBits); i++ {
+			z[i] = uint64(vBits[i])
+		}
+	} else {
+		for i := 0; i < len(vBits); i++ {
+			if i%2 == 0 {
+				z[i/2] = uint64(vBits[i])
+			} else {
+				z[i/2] |= uint64(vBits[i]) << 32
+			}
+		}
+	}
+
+	return z.ToMont()
+}
+
+// SetString creates a big.Int with number and calls SetBigInt on z
+//
+// The number prefix determines the actual base: A prefix of
+// ''0b'' or ''0B'' selects base 2, ''0'', ''0o'' or ''0O'' selects base 8,
+// and ''0x'' or ''0X'' selects base 16. Otherwise, the selected base is 10
+// and no prefix is accepted.
+//
+// For base 16, lower and upper case letters are considered the same:
+// The letters 'a' to 'f' and 'A' to 'F' represent digit values 10 to 15.
+//
+// An underscore character ''_'' may appear between a base
+// prefix and an adjacent digit, and between successive digits; such
+// underscores do not change the value of the number.
+// Incorrect placement of underscores is reported as a panic if there
+// are no other errors.
+//
+func (z *Element) SetString(number string) *Element {
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(number, 0); !ok {
+		panic("Element.SetString failed -> can't parse number into a big.Int " + number)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+
+	return z
+}
+
+// MarshalJSON returns json encoding of z (z.Text(10))
+// If z == nil, returns null
+func (z *Element) MarshalJSON() ([]byte, error) {
+	if z == nil {
+		return []byte("null"), nil
+	}
+	const maxSafeBound = 15 // we encode it as number if it's small
+	s := z.Text(10)
+	if len(s) <= maxSafeBound {
+		return []byte(s), nil
+	}
+	var sbb strings.Builder
+	sbb.WriteByte('"')
+	sbb.WriteString(s)
+	sbb.WriteByte('"')
+	return []byte(sbb.String()), nil
+}
+
+// UnmarshalJSON accepts numbers and strings as input
+// See Element.SetString for valid prefixes (0x, 0b, ...)
+func (z *Element) UnmarshalJSON(data []byte) error {
+	s := string(data)
+	if len(s) > Bits*3 {
+		return errors.New("value too large (max = Element.Bits * 3)")
+	}
+
+	// we accept numbers and strings, remove leading and trailing quotes if any
+	if len(s) > 0 && s[0] == '"' {
+		s = s[1:]
+	}
+	if len(s) > 0 && s[len(s)-1] == '"' {
+		s = s[:len(s)-1]
+	}
+
+	// get temporary big int from the pool
+	vv := bigIntPool.Get().(*big.Int)
+
+	if _, ok := vv.SetString(s, 0); !ok {
+		return errors.New("can't parse into a big.Int: " + s)
+	}
+
+	z.SetBigInt(vv)
+
+	// release object into pool
+	bigIntPool.Put(vv)
+	return nil
+}
+
+// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
+func (z *Element) Legendre() int {
+	var l Element
+	// z^((q-1)/2)
+	l.expByLegendreExp(*z)
+
+	if l.IsZero() {
+		return 0
+	}
+
+	// if l == 1
+	if (l[3] == 1849268063235586341) && (l[2] == 5455128044303689984) && (l[1] == 10640745125853265911) && (l[0] == 11387109765248188409) {
+		return 1
+	}
+	return -1
+}
+
+// Sqrt z = √x mod q
+// if the square root doesn't exist (x is not a square mod q)
+// Sqrt leaves z unchanged and returns nil
+func (z *Element) Sqrt(x *Element) *Element {
+	// q ≡ 1 (mod 4)
+	// see modSqrtTonelliShanks in math/big/int.go
+	// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+
+	var y, b, t, w Element
+	// w = x^((s-1)/2))
+	w.expBySqrtExp(*x)
+
+	// y = x^((s+1)/2)) = w * x
+	y.Mul(x, &w)
+
+	// b = x^s = w * w * x = y * x
+	b.Mul(&w, &y)
+
+	// g = nonResidue ^ s
+	var g = Element{
+		4558548184074722573,
+		11721321436470045759,
+		14707307855974552649,
+		1565820507177503731,
+	}
+	r := uint64(42)
+
+	// compute legendre symbol
+	// t = x^((q-1)/2) = r-1 squaring of x^s
+	t = b
+	for i := uint64(0); i < r-1; i++ {
+		t.Square(&t)
+	}
+	if t.IsZero() {
+		return z.SetZero()
+	}
+	if !((t[3] == 1849268063235586341) && (t[2] == 5455128044303689984) && (t[1] == 10640745125853265911) && (t[0] == 11387109765248188409)) {
+		// t != 1, we don't have a square root
+		return nil
+	}
+	for {
+		var m uint64
+		t = b
+
+		// for t != 1
+		for !((t[3] == 1849268063235586341) && (t[2] == 5455128044303689984) && (t[1] == 10640745125853265911) && (t[0] == 11387109765248188409)) {
+			t.Square(&t)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+		// t = g^(2^(r-m-1)) mod q
+		ge := int(r - m - 1)
+		t = g
+		for ge > 0 {
+			t.Square(&t)
+			ge--
+		}
+
+		g.Square(&t)
+		y.Mul(&y, &t)
+		b.Mul(&b, &g)
+		r = m
+	}
+}
+
+func max(a int, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+func min(a int, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+const updateFactorsConversionBias int64 = 0x7fffffff7fffffff // (2³¹ - 1)(2³² + 1)
+const updateFactorIdentityMatrixRow0 = 1
+const updateFactorIdentityMatrixRow1 = 1 << 32
+
+func updateFactorsDecompose(c int64) (int64, int64) {
+	c += updateFactorsConversionBias
+	const low32BitsFilter int64 = 0xFFFFFFFF
+	f := c&low32BitsFilter - 0x7FFFFFFF
+	g := c>>32&low32BitsFilter - 0x7FFFFFFF
+	return f, g
+}
+
+const k = 32 // word size / 2
+const signBitSelector = uint64(1) << 63
+const approxLowBitsN = k - 1
+const approxHighBitsN = k + 1
+const inversionCorrectionFactorWord0 = 11496758646349758257
+const inversionCorrectionFactorWord1 = 14106295395927053233
+const inversionCorrectionFactorWord2 = 9675338311035607220
+const inversionCorrectionFactorWord3 = 300574624876614870
+
+const invIterationsN = 18
+
+// Inverse z = x⁻¹ mod q
+// Implements "Optimized Binary GCD for Modular Inversion"
+// https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
+func (z *Element) Inverse(x *Element) *Element {
+	if x.IsZero() {
+		z.SetZero()
+		return z
+	}
+
+	a := *x
+	b := Element{
+		qElementWord0,
+		qElementWord1,
+		qElementWord2,
+		qElementWord3,
+	} // b := q
+
+	u := Element{1}
+
+	// Update factors: we get [u; v]:= [f0 g0; f1 g1] [u; v]
+	// c_i = f_i + 2³¹ - 1 + 2³² * (g_i + 2³¹ - 1)
+	var c0, c1 int64
+
+	// Saved update factors to reduce the number of field multiplications
+	var pf0, pf1, pg0, pg1 int64
+
+	var i uint
+
+	var v, s Element
+
+	// Since u,v are updated every other iteration, we must make sure we terminate after evenly many iterations
+	// This also lets us get away with half as many updates to u,v
+	// To make this constant-time-ish, replace the condition with i < invIterationsN
+	for i = 0; i&1 == 1 || !a.IsZero(); i++ {
+		n := max(a.BitLen(), b.BitLen())
+		aApprox, bApprox := approximate(&a, n), approximate(&b, n)
+
+		// After 0 iterations, we have f₀ ≤ 2⁰ and f₁ < 2⁰
+		// f0, g0, f1, g1 = 1, 0, 0, 1
+		c0, c1 = updateFactorIdentityMatrixRow0, updateFactorIdentityMatrixRow1
+
+		for j := 0; j < approxLowBitsN; j++ {
+
+			if aApprox&1 == 0 {
+				aApprox /= 2
+			} else {
+				s, borrow := bits.Sub64(aApprox, bApprox, 0)
+				if borrow == 1 {
+					s = bApprox - aApprox
+					bApprox = aApprox
+					c0, c1 = c1, c0
+				}
+
+				aApprox = s / 2
+				c0 = c0 - c1
+
+				// Now |f₀| < 2ʲ + 2ʲ = 2ʲ⁺¹
+				// |f₁| ≤ 2ʲ still
+			}
+
+			c1 *= 2
+			// |f₁| ≤ 2ʲ⁺¹
+		}
+
+		s = a
+
+		var g0 int64
+		// from this point on c0 aliases for f0
+		c0, g0 = updateFactorsDecompose(c0)
+		aHi := a.linearCombNonModular(&s, c0, &b, g0)
+		if aHi&signBitSelector != 0 {
+			// if aHi < 0
+			c0, g0 = -c0, -g0
+			aHi = a.neg(&a, aHi)
+		}
+		// right-shift a by k-1 bits
+		a[0] = (a[0] >> approxLowBitsN) | ((a[1]) << approxHighBitsN)
+		a[1] = (a[1] >> approxLowBitsN) | ((a[2]) << approxHighBitsN)
+		a[2] = (a[2] >> approxLowBitsN) | ((a[3]) << approxHighBitsN)
+		a[3] = (a[3] >> approxLowBitsN) | (aHi << approxHighBitsN)
+
+		var f1 int64
+		// from this point on c1 aliases for g0
+		f1, c1 = updateFactorsDecompose(c1)
+		bHi := b.linearCombNonModular(&s, f1, &b, c1)
+		if bHi&signBitSelector != 0 {
+			// if bHi < 0
+			f1, c1 = -f1, -c1
+			bHi = b.neg(&b, bHi)
+		}
+		// right-shift b by k-1 bits
+		b[0] = (b[0] >> approxLowBitsN) | ((b[1]) << approxHighBitsN)
+		b[1] = (b[1] >> approxLowBitsN) | ((b[2]) << approxHighBitsN)
+		b[2] = (b[2] >> approxLowBitsN) | ((b[3]) << approxHighBitsN)
+		b[3] = (b[3] >> approxLowBitsN) | (bHi << approxHighBitsN)
+
+		if i&1 == 1 {
+			// Combine current update factors with previously stored ones
+			// [f₀, g₀; f₁, g₁] ← [f₀, g₀; f₁, g₀] [pf₀, pg₀; pf₀, pg₀]
+			// We have |f₀|, |g₀|, |pf₀|, |pf₁| ≤ 2ᵏ⁻¹, and that |pf_i| < 2ᵏ⁻¹ for i ∈ {0, 1}
+			// Then for the new value we get |f₀| < 2ᵏ⁻¹ × 2ᵏ⁻¹ + 2ᵏ⁻¹ × 2ᵏ⁻¹ = 2²ᵏ⁻¹
+			// Which leaves us with an extra bit for the sign
+
+			// c0 aliases f0, c1 aliases g1
+			c0, g0, f1, c1 = c0*pf0+g0*pf1,
+				c0*pg0+g0*pg1,
+				f1*pf0+c1*pf1,
+				f1*pg0+c1*pg1
+
+			s = u
+			u.linearCombSosSigned(&u, c0, &v, g0)
+			v.linearCombSosSigned(&s, f1, &v, c1)
+
+		} else {
+			// Save update factors
+			pf0, pg0, pf1, pg1 = c0, g0, f1, c1
+		}
+	}
+
+	// For every iteration that we miss, v is not being multiplied by 2²ᵏ⁻²
+	const pSq int64 = 1 << (2 * (k - 1))
+	// If the function is constant-time ish, this loop will not run (probably no need to take it out explicitly)
+	for ; i < invIterationsN; i += 2 {
+		v.mulWSigned(&v, pSq)
+	}
+
+	z.Mul(&v, &Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	})
+	return z
+}
+
+// approximate a big number x into a single 64 bit word using its uppermost and lowermost bits
+// if x fits in a word as is, no approximation necessary
+func approximate(x *Element, nBits int) uint64 {
+
+	if nBits <= 64 {
+		return x[0]
+	}
+
+	const mask = (uint64(1) << (k - 1)) - 1 // k-1 ones
+	lo := mask & x[0]
+
+	hiWordIndex := (nBits - 1) / 64
+
+	hiWordBitsAvailable := nBits - hiWordIndex*64
+	hiWordBitsUsed := min(hiWordBitsAvailable, approxHighBitsN)
+
+	mask_ := uint64(^((1 << (hiWordBitsAvailable - hiWordBitsUsed)) - 1))
+	hi := (x[hiWordIndex] & mask_) << (64 - hiWordBitsAvailable)
+
+	mask_ = ^(1<<(approxLowBitsN+hiWordBitsUsed) - 1)
+	mid := (mask_ & x[hiWordIndex-1]) >> hiWordBitsUsed
+
+	return lo | mid | hi
+}
+
+func (z *Element) linearCombSosSigned(x *Element, xC int64, y *Element, yC int64) {
+	hi := z.linearCombNonModular(x, xC, y, yC)
+	z.montReduceSigned(z, hi)
+}
+
+// montReduceSigned SOS algorithm; xHi must be at most 63 bits long. Last bit of xHi may be used as a sign bit
+func (z *Element) montReduceSigned(x *Element, xHi uint64) {
+
+	const signBitRemover = ^signBitSelector
+	neg := xHi&signBitSelector != 0
+	// the SOS implementation requires that most significant bit is 0
+	// Let X be xHi*r + x
+	// note that if X is negative we would have initially stored it as 2⁶⁴ r + X
+	xHi &= signBitRemover
+	// with this a negative X is now represented as 2⁶³ r + X
+
+	var t [2*Limbs - 1]uint64
+	var C uint64
+
+	m := x[0] * qInvNegLsw
+
+	C = madd0(m, qElementWord0, x[0])
+	C, t[1] = madd2(m, qElementWord1, x[1], C)
+	C, t[2] = madd2(m, qElementWord2, x[2], C)
+	C, t[3] = madd2(m, qElementWord3, x[3], C)
+
+	// the high word of m * qElement[3] is at most 62 bits
+	// x[3] + C is at most 65 bits (high word at most 1 bit)
+	// Thus the resulting C will be at most 63 bits
+	t[4] = xHi + C
+	// xHi and C are 63 bits, therefore no overflow
+
+	{
+		const i = 1
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 2
+		m = t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, t[i+1] = madd2(m, qElementWord1, t[i+1], C)
+		C, t[i+2] = madd2(m, qElementWord2, t[i+2], C)
+		C, t[i+3] = madd2(m, qElementWord3, t[i+3], C)
+
+		t[i+Limbs] += C
+	}
+	{
+		const i = 3
+		m := t[i] * qInvNegLsw
+
+		C = madd0(m, qElementWord0, t[i+0])
+		C, z[0] = madd2(m, qElementWord1, t[i+1], C)
+		C, z[1] = madd2(m, qElementWord2, t[i+2], C)
+		z[3], z[2] = madd2(m, qElementWord3, t[i+3], C)
+	}
+
+	// if z > q → z -= q
+	// note: this is NOT constant time
+	if !(z[3] < 2371068001496280753 || (z[3] == 2371068001496280753 && (z[2] < 9761692607219216639 || (z[2] == 9761692607219216639 && (z[1] < 16926637627159085057 || (z[1] == 16926637627159085057 && (z[0] < 3643768340310130689))))))) {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], 3643768340310130689, 0)
+		z[1], b = bits.Sub64(z[1], 16926637627159085057, b)
+		z[2], b = bits.Sub64(z[2], 9761692607219216639, b)
+		z[3], _ = bits.Sub64(z[3], 2371068001496280753, b)
+	}
+	if neg {
+		// We have computed ( 2⁶³ r + X ) r⁻¹ = 2⁶³ + X r⁻¹ instead
+		var b uint64
+		z[0], b = bits.Sub64(z[0], signBitSelector, 0)
+		z[1], b = bits.Sub64(z[1], 0, b)
+		z[2], b = bits.Sub64(z[2], 0, b)
+		z[3], b = bits.Sub64(z[3], 0, b)
+
+		// Occurs iff x == 0 && xHi < 0, i.e. X = rX' for -2⁶³ ≤ X' < 0
+		if b != 0 {
+			// z[3] = -1
+			// negative: add q
+			const neg1 = 0xFFFFFFFFFFFFFFFF
+
+			b = 0
+			z[0], b = bits.Add64(z[0], qElementWord0, b)
+			z[1], b = bits.Add64(z[1], qElementWord1, b)
+			z[2], b = bits.Add64(z[2], qElementWord2, b)
+			z[3], _ = bits.Add64(neg1, qElementWord3, b)
+		}
+	}
+}
+
+// mulWSigned mul word signed (w/ montgomery reduction)
+func (z *Element) mulWSigned(x *Element, y int64) {
+	m := y >> 63
+	_mulWGeneric(z, x, uint64((y^m)-m))
+	// multiply by abs(y)
+	if y < 0 {
+		z.Neg(z)
+	}
+}
+
+func (z *Element) neg(x *Element, xHi uint64) uint64 {
+	var b uint64
+
+	z[0], b = bits.Sub64(0, x[0], 0)
+	z[1], b = bits.Sub64(0, x[1], b)
+	z[2], b = bits.Sub64(0, x[2], b)
+	z[3], b = bits.Sub64(0, x[3], b)
+	xHi, _ = bits.Sub64(0, xHi, b)
+
+	return xHi
+}
+
+// regular multiplication by one word regular (non montgomery)
+// Fewer additions than the branch-free for positive y. Could be faster on some architectures
+func (z *Element) mulWRegular(x *Element, y int64) uint64 {
+
+	// w := abs(y)
+	m := y >> 63
+	w := uint64((y ^ m) - m)
+
+	var c uint64
+	c, z[0] = bits.Mul64(x[0], w)
+	c, z[1] = madd1(x[1], w, c)
+	c, z[2] = madd1(x[2], w, c)
+	c, z[3] = madd1(x[3], w, c)
+
+	if y < 0 {
+		c = z.neg(z, c)
+	}
+
+	return c
+}
+
+/*
+Removed: seems slower
+// mulWRegular branch-free regular multiplication by one word (non montgomery)
+func (z *Element) mulWRegularBf(x *Element, y int64) uint64 {
+
+	w := uint64(y)
+	allNeg := uint64(y >> 63)	// -1 if y < 0, 0 o.w
+
+	// s[0], s[1] so results are not stored immediately in z.
+	// x[i] will be needed in the i+1 th iteration. We don't want to overwrite it in case x = z
+	var s [2]uint64
+	var h [2]uint64
+
+	h[0], s[0] = bits.Mul64(x[0], w)
+
+	c := uint64(0)
+	b := uint64(0)
+
+		{
+			const curI = 1 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 1 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[1], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 2 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 2 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[2], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+
+		{
+			const curI = 3 % 2
+			const prevI = 1 - curI
+			const iMinusOne = 3 - 1
+
+			h[curI], s[curI] = bits.Mul64(x[3], w)
+			s[curI], c = bits.Add64(s[curI], h[prevI], c)
+			s[curI], b = bits.Sub64(s[curI], allNeg & x[iMinusOne], b)
+			z[iMinusOne] = s[prevI]
+		}
+	{
+		const curI = 4 % 2
+		const prevI = 1 - curI
+		const iMinusOne = 3
+
+		s[curI], _ = bits.Sub64(h[prevI], allNeg & x[iMinusOne], b)
+		z[iMinusOne] = s[prevI]
+
+		return s[curI] + c
+	}
+}*/
+
+// Requires NoCarry
+func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int64) uint64 {
+	var yTimes Element
+
+	yHi := yTimes.mulWRegular(y, yC)
+	xHi := z.mulWRegular(x, xC)
+
+	carry := uint64(0)
+	z[0], carry = bits.Add64(z[0], yTimes[0], carry)
+	z[1], carry = bits.Add64(z[1], yTimes[1], carry)
+	z[2], carry = bits.Add64(z[2], yTimes[2], carry)
+	z[3], carry = bits.Add64(z[3], yTimes[3], carry)
+
+	yHi, _ = bits.Add64(xHi, yHi, carry)
+
+	return yHi
+}
diff --git a/ecc/bls12-378/fr/element_exp.go b/ecc/bls12-378/fr/element_exp.go
new file mode 100644
index 000000000..372af0051
--- /dev/null
+++ b/ecc/bls12-378/fr/element_exp.go
@@ -0,0 +1,642 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// expBySqrtExp is equivalent to z.Exp(x, 41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expBySqrtExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10       = 2*1
+	//	_100      = 2*_10
+	//	_101      = 1 + _100
+	//	_1010     = 2*_101
+	//	_1111     = _101 + _1010
+	//	_10011    = _100 + _1111
+	//	_10100    = 1 + _10011
+	//	_11101    = _1010 + _10011
+	//	_101100   = _1111 + _11101
+	//	_1001001  = _11101 + _101100
+	//	_1001101  = _100 + _1001001
+	//	_1001111  = _10 + _1001101
+	//	_1010011  = _100 + _1001111
+	//	_1011100  = _1111 + _1001101
+	//	_10101011 = _1001111 + _1011100
+	//	_10111110 = _10011 + _10101011
+	//	_11001000 = _1010 + _10111110
+	//	i18       = 2*_11001000
+	//	i19       = _10101011 + i18
+	//	i20       = _1001001 + i19
+	//	i21       = i18 + i20
+	//	i22       = _1001101 + i21
+	//	i23       = _1010011 + i22
+	//	i24       = _1001001 + i23
+	//	i25       = i20 + i24
+	//	i26       = _1111 + i25
+	//	i27       = i19 + i26
+	//	i28       = i22 + i27
+	//	i29       = i24 + i28
+	//	i30       = _10111110 + i29
+	//	i31       = _101100 + i30
+	//	i32       = i25 + i31
+	//	i33       = i30 + i32
+	//	i34       = i28 + i33
+	//	i35       = _10100 + i34
+	//	i36       = i21 + i35
+	//	i37       = i32 + i36
+	//	i38       = i27 + i37
+	//	i39       = i31 + i38
+	//	i40       = i23 + i39
+	//	i41       = 2*i36
+	//	i42       = i38 + i40
+	//	i43       = _1011100 + i42
+	//	i92       = ((i41 << 16 + i42) << 14 + i33) << 17
+	//	i129      = ((i37 + i92) << 20 + i26 + i43) << 14
+	//	i168      = ((i34 + i129) << 17 + i35) << 19 + i40
+	//	i209      = ((i168 << 17 + i43) << 17 + i39) << 5
+	//	i248      = ((_101 + i209) << 30 + i29) << 6 + _101
+	//	return      i248 << 3
+	//
+	// Operations: 200 squares 51 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10 Element
+	// Step 1: t3 = x^0x2
+	t3.Square(&x)
+
+	// Step 2: t2 = x^0x4
+	t2.Square(t3)
+
+	// Step 3: z = x^0x5
+	z.Mul(&x, t2)
+
+	// Step 4: t9 = x^0xa
+	t9.Square(z)
+
+	// Step 5: t6 = x^0xf
+	t6.Mul(z, t9)
+
+	// Step 6: t8 = x^0x13
+	t8.Mul(t2, t6)
+
+	// Step 7: t4 = x^0x14
+	t4.Mul(&x, t8)
+
+	// Step 8: t0 = x^0x1d
+	t0.Mul(t9, t8)
+
+	// Step 9: t1 = x^0x2c
+	t1.Mul(t6, t0)
+
+	// Step 10: t0 = x^0x49
+	t0.Mul(t0, t1)
+
+	// Step 11: t5 = x^0x4d
+	t5.Mul(t2, t0)
+
+	// Step 12: t7 = x^0x4f
+	t7.Mul(t3, t5)
+
+	// Step 13: t3 = x^0x53
+	t3.Mul(t2, t7)
+
+	// Step 14: t2 = x^0x5c
+	t2.Mul(t6, t5)
+
+	// Step 15: t7 = x^0xab
+	t7.Mul(t7, t2)
+
+	// Step 16: t8 = x^0xbe
+	t8.Mul(t8, t7)
+
+	// Step 17: t9 = x^0xc8
+	t9.Mul(t9, t8)
+
+	// Step 18: t10 = x^0x190
+	t10.Square(t9)
+
+	// Step 19: t9 = x^0x23b
+	t9.Mul(t7, t10)
+
+	// Step 20: t7 = x^0x284
+	t7.Mul(t0, t9)
+
+	// Step 21: t10 = x^0x414
+	t10.Mul(t10, t7)
+
+	// Step 22: t5 = x^0x461
+	t5.Mul(t5, t10)
+
+	// Step 23: t3 = x^0x4b4
+	t3.Mul(t3, t5)
+
+	// Step 24: t0 = x^0x4fd
+	t0.Mul(t0, t3)
+
+	// Step 25: t7 = x^0x781
+	t7.Mul(t7, t0)
+
+	// Step 26: t6 = x^0x790
+	t6.Mul(t6, t7)
+
+	// Step 27: t9 = x^0x9cb
+	t9.Mul(t9, t6)
+
+	// Step 28: t5 = x^0xe2c
+	t5.Mul(t5, t9)
+
+	// Step 29: t0 = x^0x1329
+	t0.Mul(t0, t5)
+
+	// Step 30: t8 = x^0x13e7
+	t8.Mul(t8, t0)
+
+	// Step 31: t1 = x^0x1413
+	t1.Mul(t1, t8)
+
+	// Step 32: t7 = x^0x1b94
+	t7.Mul(t7, t1)
+
+	// Step 33: t8 = x^0x2f7b
+	t8.Mul(t8, t7)
+
+	// Step 34: t5 = x^0x3da7
+	t5.Mul(t5, t8)
+
+	// Step 35: t4 = x^0x3dbb
+	t4.Mul(t4, t5)
+
+	// Step 36: t10 = x^0x41cf
+	t10.Mul(t10, t4)
+
+	// Step 37: t7 = x^0x5d63
+	t7.Mul(t7, t10)
+
+	// Step 38: t9 = x^0x672e
+	t9.Mul(t9, t7)
+
+	// Step 39: t1 = x^0x7b41
+	t1.Mul(t1, t9)
+
+	// Step 40: t3 = x^0x7ff5
+	t3.Mul(t3, t1)
+
+	// Step 41: t10 = x^0x839e
+	t10.Square(t10)
+
+	// Step 42: t9 = x^0xe723
+	t9.Mul(t9, t3)
+
+	// Step 43: t2 = x^0xe77f
+	t2.Mul(t2, t9)
+
+	// Step 59: t10 = x^0x839e0000
+	for s := 0; s < 16; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 60: t9 = x^0x839ee723
+	t9.Mul(t9, t10)
+
+	// Step 74: t9 = x^0x20e7b9c8c000
+	for s := 0; s < 14; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 75: t8 = x^0x20e7b9c8ef7b
+	t8.Mul(t8, t9)
+
+	// Step 92: t8 = x^0x41cf7391def60000
+	for s := 0; s < 17; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 93: t7 = x^0x41cf7391def65d63
+	t7.Mul(t7, t8)
+
+	// Step 113: t7 = x^0x41cf7391def65d6300000
+	for s := 0; s < 20; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 114: t6 = x^0x41cf7391def65d6300790
+	t6.Mul(t6, t7)
+
+	// Step 115: t6 = x^0x41cf7391def65d630ef0f
+	t6.Mul(t2, t6)
+
+	// Step 129: t6 = x^0x1073dce477bd9758c3bc3c000
+	for s := 0; s < 14; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 130: t5 = x^0x1073dce477bd9758c3bc3fda7
+	t5.Mul(t5, t6)
+
+	// Step 147: t5 = x^0x20e7b9c8ef7b2eb187787fb4e0000
+	for s := 0; s < 17; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 148: t4 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb
+	t4.Mul(t4, t5)
+
+	// Step 167: t4 = x^0x1073dce477bd9758c3bc3fda71edd80000
+	for s := 0; s < 19; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 168: t3 = x^0x1073dce477bd9758c3bc3fda71edd87ff5
+	t3.Mul(t3, t4)
+
+	// Step 185: t3 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffea0000
+	for s := 0; s < 17; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 186: t2 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f
+	t2.Mul(t2, t3)
+
+	// Step 203: t2 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe0000
+	for s := 0; s < 17; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 204: t1 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b41
+	t1.Mul(t1, t2)
+
+	// Step 209: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6820
+	for s := 0; s < 5; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 210: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6825
+	t1.Mul(z, t1)
+
+	// Step 240: t1 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940000000
+	for s := 0; s < 30; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 241: t0 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940001329
+	t0.Mul(t0, t1)
+
+	// Step 247: t0 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca40
+	for s := 0; s < 6; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 248: z = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca45
+	z.Mul(z, t0)
+
+	// Step 251: z = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
+	for s := 0; s < 3; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
+
+// expByLegendreExp is equivalent to z.Exp(x, 1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000)
+//
+// uses github.com/mmcloughlin/addchain v0.4.0 to generate a shorter addition chain
+func (z *Element) expByLegendreExp(x Element) *Element {
+	// addition chain:
+	//
+	//	_10       = 2*1
+	//	_100      = 2*_10
+	//	_101      = 1 + _100
+	//	_1010     = 2*_101
+	//	_1111     = _101 + _1010
+	//	_10011    = _100 + _1111
+	//	_10100    = 1 + _10011
+	//	_11101    = _1010 + _10011
+	//	_101100   = _1111 + _11101
+	//	_1001001  = _11101 + _101100
+	//	_1001101  = _100 + _1001001
+	//	_1001111  = _10 + _1001101
+	//	_1010001  = _10 + _1001111
+	//	_1010011  = _10 + _1010001
+	//	_1011100  = _1111 + _1001101
+	//	_10101011 = _1001111 + _1011100
+	//	_10111110 = _10011 + _10101011
+	//	_11001000 = _1010 + _10111110
+	//	i19       = 2*_11001000
+	//	i20       = _10101011 + i19
+	//	i21       = _1001001 + i20
+	//	i22       = i19 + i21
+	//	i23       = _1001101 + i22
+	//	i24       = _1010011 + i23
+	//	i25       = _1001001 + i24
+	//	i26       = i21 + i25
+	//	i27       = _1111 + i26
+	//	i28       = i20 + i27
+	//	i29       = i23 + i28
+	//	i30       = i25 + i29
+	//	i31       = _10111110 + i30
+	//	i32       = _101100 + i31
+	//	i33       = i26 + i32
+	//	i34       = i31 + i33
+	//	i35       = i29 + i34
+	//	i36       = _10100 + i35
+	//	i37       = i22 + i36
+	//	i38       = i33 + i37
+	//	i39       = i28 + i38
+	//	i40       = i32 + i39
+	//	i41       = i24 + i40
+	//	i42       = 2*i37
+	//	i43       = i39 + i41
+	//	i44       = _1011100 + i43
+	//	i93       = ((i42 << 16 + i43) << 14 + i34) << 17
+	//	i130      = ((i38 + i93) << 20 + i27 + i44) << 14
+	//	i169      = ((i35 + i130) << 17 + i36) << 19 + i41
+	//	i210      = ((i169 << 17 + i44) << 17 + i40) << 5
+	//	i253      = ((_101 + i210) << 30 + i30) << 10 + _1010001
+	//	return      i253 << 41
+	//
+	// Operations: 242 squares 52 multiplies
+
+	// Allocate Temporaries.
+	var (
+		t0  = new(Element)
+		t1  = new(Element)
+		t2  = new(Element)
+		t3  = new(Element)
+		t4  = new(Element)
+		t5  = new(Element)
+		t6  = new(Element)
+		t7  = new(Element)
+		t8  = new(Element)
+		t9  = new(Element)
+		t10 = new(Element)
+		t11 = new(Element)
+	)
+
+	// var t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11 Element
+	// Step 1: t3 = x^0x2
+	t3.Square(&x)
+
+	// Step 2: z = x^0x4
+	z.Square(t3)
+
+	// Step 3: t1 = x^0x5
+	t1.Mul(&x, z)
+
+	// Step 4: t10 = x^0xa
+	t10.Square(t1)
+
+	// Step 5: t7 = x^0xf
+	t7.Mul(t1, t10)
+
+	// Step 6: t9 = x^0x13
+	t9.Mul(z, t7)
+
+	// Step 7: t5 = x^0x14
+	t5.Mul(&x, t9)
+
+	// Step 8: t0 = x^0x1d
+	t0.Mul(t10, t9)
+
+	// Step 9: t2 = x^0x2c
+	t2.Mul(t7, t0)
+
+	// Step 10: t0 = x^0x49
+	t0.Mul(t0, t2)
+
+	// Step 11: t6 = x^0x4d
+	t6.Mul(z, t0)
+
+	// Step 12: t8 = x^0x4f
+	t8.Mul(t3, t6)
+
+	// Step 13: z = x^0x51
+	z.Mul(t3, t8)
+
+	// Step 14: t4 = x^0x53
+	t4.Mul(t3, z)
+
+	// Step 15: t3 = x^0x5c
+	t3.Mul(t7, t6)
+
+	// Step 16: t8 = x^0xab
+	t8.Mul(t8, t3)
+
+	// Step 17: t9 = x^0xbe
+	t9.Mul(t9, t8)
+
+	// Step 18: t10 = x^0xc8
+	t10.Mul(t10, t9)
+
+	// Step 19: t11 = x^0x190
+	t11.Square(t10)
+
+	// Step 20: t10 = x^0x23b
+	t10.Mul(t8, t11)
+
+	// Step 21: t8 = x^0x284
+	t8.Mul(t0, t10)
+
+	// Step 22: t11 = x^0x414
+	t11.Mul(t11, t8)
+
+	// Step 23: t6 = x^0x461
+	t6.Mul(t6, t11)
+
+	// Step 24: t4 = x^0x4b4
+	t4.Mul(t4, t6)
+
+	// Step 25: t0 = x^0x4fd
+	t0.Mul(t0, t4)
+
+	// Step 26: t8 = x^0x781
+	t8.Mul(t8, t0)
+
+	// Step 27: t7 = x^0x790
+	t7.Mul(t7, t8)
+
+	// Step 28: t10 = x^0x9cb
+	t10.Mul(t10, t7)
+
+	// Step 29: t6 = x^0xe2c
+	t6.Mul(t6, t10)
+
+	// Step 30: t0 = x^0x1329
+	t0.Mul(t0, t6)
+
+	// Step 31: t9 = x^0x13e7
+	t9.Mul(t9, t0)
+
+	// Step 32: t2 = x^0x1413
+	t2.Mul(t2, t9)
+
+	// Step 33: t8 = x^0x1b94
+	t8.Mul(t8, t2)
+
+	// Step 34: t9 = x^0x2f7b
+	t9.Mul(t9, t8)
+
+	// Step 35: t6 = x^0x3da7
+	t6.Mul(t6, t9)
+
+	// Step 36: t5 = x^0x3dbb
+	t5.Mul(t5, t6)
+
+	// Step 37: t11 = x^0x41cf
+	t11.Mul(t11, t5)
+
+	// Step 38: t8 = x^0x5d63
+	t8.Mul(t8, t11)
+
+	// Step 39: t10 = x^0x672e
+	t10.Mul(t10, t8)
+
+	// Step 40: t2 = x^0x7b41
+	t2.Mul(t2, t10)
+
+	// Step 41: t4 = x^0x7ff5
+	t4.Mul(t4, t2)
+
+	// Step 42: t11 = x^0x839e
+	t11.Square(t11)
+
+	// Step 43: t10 = x^0xe723
+	t10.Mul(t10, t4)
+
+	// Step 44: t3 = x^0xe77f
+	t3.Mul(t3, t10)
+
+	// Step 60: t11 = x^0x839e0000
+	for s := 0; s < 16; s++ {
+		t11.Square(t11)
+	}
+
+	// Step 61: t10 = x^0x839ee723
+	t10.Mul(t10, t11)
+
+	// Step 75: t10 = x^0x20e7b9c8c000
+	for s := 0; s < 14; s++ {
+		t10.Square(t10)
+	}
+
+	// Step 76: t9 = x^0x20e7b9c8ef7b
+	t9.Mul(t9, t10)
+
+	// Step 93: t9 = x^0x41cf7391def60000
+	for s := 0; s < 17; s++ {
+		t9.Square(t9)
+	}
+
+	// Step 94: t8 = x^0x41cf7391def65d63
+	t8.Mul(t8, t9)
+
+	// Step 114: t8 = x^0x41cf7391def65d6300000
+	for s := 0; s < 20; s++ {
+		t8.Square(t8)
+	}
+
+	// Step 115: t7 = x^0x41cf7391def65d6300790
+	t7.Mul(t7, t8)
+
+	// Step 116: t7 = x^0x41cf7391def65d630ef0f
+	t7.Mul(t3, t7)
+
+	// Step 130: t7 = x^0x1073dce477bd9758c3bc3c000
+	for s := 0; s < 14; s++ {
+		t7.Square(t7)
+	}
+
+	// Step 131: t6 = x^0x1073dce477bd9758c3bc3fda7
+	t6.Mul(t6, t7)
+
+	// Step 148: t6 = x^0x20e7b9c8ef7b2eb187787fb4e0000
+	for s := 0; s < 17; s++ {
+		t6.Square(t6)
+	}
+
+	// Step 149: t5 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb
+	t5.Mul(t5, t6)
+
+	// Step 168: t5 = x^0x1073dce477bd9758c3bc3fda71edd80000
+	for s := 0; s < 19; s++ {
+		t5.Square(t5)
+	}
+
+	// Step 169: t4 = x^0x1073dce477bd9758c3bc3fda71edd87ff5
+	t4.Mul(t4, t5)
+
+	// Step 186: t4 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffea0000
+	for s := 0; s < 17; s++ {
+		t4.Square(t4)
+	}
+
+	// Step 187: t3 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f
+	t3.Mul(t3, t4)
+
+	// Step 204: t3 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe0000
+	for s := 0; s < 17; s++ {
+		t3.Square(t3)
+	}
+
+	// Step 205: t2 = x^0x41cf7391def65d630ef0ff69c7b761ffd5cefe7b41
+	t2.Mul(t2, t3)
+
+	// Step 210: t2 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6820
+	for s := 0; s < 5; s++ {
+		t2.Square(t2)
+	}
+
+	// Step 211: t1 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf6825
+	t1.Mul(t1, t2)
+
+	// Step 241: t1 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940000000
+	for s := 0; s < 30; s++ {
+		t1.Square(t1)
+	}
+
+	// Step 242: t0 = x^0x20e7b9c8ef7b2eb187787fb4e3dbb0ffeae77f3da0940001329
+	t0.Mul(t0, t1)
+
+	// Step 252: t0 = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca400
+	for s := 0; s < 10; s++ {
+		t0.Square(t0)
+	}
+
+	// Step 253: z = x^0x839ee723bdecbac61de1fed38f6ec3ffab9dfcf682500004ca451
+	z.Mul(z, t0)
+
+	// Step 294: z = x^0x1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
+	for s := 0; s < 41; s++ {
+		z.Square(z)
+	}
+
+	return z
+}
diff --git a/ecc/bls12-378/fr/element_fuzz.go b/ecc/bls12-378/fr/element_fuzz.go
new file mode 100644
index 000000000..a4c87eb25
--- /dev/null
+++ b/ecc/bls12-378/fr/element_fuzz.go
@@ -0,0 +1,136 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"math/big"
+	"math/bits"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz arithmetic operations fuzzer
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	var e1, e2 Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		// mul assembly
+
+		var c, _c Element
+		a, _a, b, _b := e1, e1, e2, e2
+		c.Mul(&a, &b)
+		_mulGeneric(&_c, &_a, &_b)
+
+		if !c.Equal(&_c) {
+			panic("mul asm != mul generic on Element")
+		}
+	}
+
+	{
+		// inverse
+		inv := e1
+		inv.Inverse(&inv)
+
+		var bInv, b1, b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		bInv.ModInverse(&b1, Modulus())
+		inv.ToBigIntRegular(&b2)
+
+		if b2.Cmp(&bInv) != 0 {
+			panic("inverse operation doesn't match big int result")
+		}
+	}
+
+	{
+		// a + -a == 0
+		a, b := e1, e1
+		b.Neg(&b)
+		a.Add(&a, &b)
+		if !a.IsZero() {
+			panic("a + -a != 0")
+		}
+	}
+
+	return fuzzNormal
+
+}
+
+// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
+// and interpret it as big endian uint64
+// used for fuzzing purposes only
+func (z *Element) SetRawBytes(r io.Reader) {
+
+	buf := make([]byte, 8)
+
+	for i := 0; i < len(z); i++ {
+		if _, err := io.ReadFull(r, buf); err != nil {
+			goto eof
+		}
+		z[i] = binary.BigEndian.Uint64(buf[:])
+	}
+eof:
+	z[3] %= qElement[3]
+
+	if z.BiggerModulus() {
+		var b uint64
+		z[0], b = bits.Sub64(z[0], qElement[0], 0)
+		z[1], b = bits.Sub64(z[1], qElement[1], b)
+		z[2], b = bits.Sub64(z[2], qElement[2], b)
+		z[3], b = bits.Sub64(z[3], qElement[3], b)
+	}
+
+	return
+}
+
+func (z *Element) BiggerModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
diff --git a/ecc/bls12-378/fr/element_mul_adx_amd64.s b/ecc/bls12-378/fr/element_mul_adx_amd64.s
new file mode 100644
index 000000000..35a9c7b30
--- /dev/null
+++ b/ecc/bls12-378/fr/element_mul_adx_amd64.s
@@ -0,0 +1,466 @@
+// +build amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), NOSPLIT, $0-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	MOVQ x+8(FP), SI
+
+	// x[0] -> DI
+	// x[1] -> R8
+	// x[2] -> R9
+	// x[3] -> R10
+	MOVQ 0(SI), DI
+	MOVQ 8(SI), R8
+	MOVQ 16(SI), R9
+	MOVQ 24(SI), R10
+	MOVQ y+16(FP), R11
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R13
+	// t[2] -> CX
+	// t[3] -> BX
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R11), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ DI, R14, R13
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R8, AX, CX
+	ADOXQ AX, R13
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R9, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+TEXT ·fromMont(SB), NOSPLIT, $0-8
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R13
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
diff --git a/ecc/bls12-378/fr/element_mul_amd64.s b/ecc/bls12-378/fr/element_mul_amd64.s
new file mode 100644
index 000000000..850f72813
--- /dev/null
+++ b/ecc/bls12-378/fr/element_mul_amd64.s
@@ -0,0 +1,488 @@
+// +build !amd64_adx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// mul(res, x, y *Element)
+TEXT ·mul(SB), $24-24
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// however, to benefit from the ADCX and ADOX carry chains
+	// we split the inner loops in 2:
+	// for i=0 to N-1
+	// 		for j=0 to N-1
+	// 		    (A,t[j])  := t[j] + x[j]*y[i] + A
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C + A
+
+	NO_LOCAL_POINTERS
+	CMPB ·supportAdx(SB), $1
+	JNE  l1
+	MOVQ x+8(FP), SI
+
+	// x[0] -> DI
+	// x[1] -> R8
+	// x[2] -> R9
+	// x[3] -> R10
+	MOVQ 0(SI), DI
+	MOVQ 8(SI), R8
+	MOVQ 16(SI), R9
+	MOVQ 24(SI), R10
+	MOVQ y+16(FP), R11
+
+	// A -> BP
+	// t[0] -> R14
+	// t[1] -> R13
+	// t[2] -> CX
+	// t[3] -> BX
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 0(R11), DX
+
+	// (A,t[0])  := x[0]*y[0] + A
+	MULXQ DI, R14, R13
+
+	// (A,t[1])  := x[1]*y[0] + A
+	MULXQ R8, AX, CX
+	ADOXQ AX, R13
+
+	// (A,t[2])  := x[2]*y[0] + A
+	MULXQ R9, AX, BX
+	ADOXQ AX, CX
+
+	// (A,t[3])  := x[3]*y[0] + A
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 8(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[1] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[1] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[1] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[1] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 16(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[2] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[2] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[2] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[2] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// clear the flags
+	XORQ AX, AX
+	MOVQ 24(R11), DX
+
+	// (A,t[0])  := t[0] + x[0]*y[3] + A
+	MULXQ DI, AX, BP
+	ADOXQ AX, R14
+
+	// (A,t[1])  := t[1] + x[1]*y[3] + A
+	ADCXQ BP, R13
+	MULXQ R8, AX, BP
+	ADOXQ AX, R13
+
+	// (A,t[2])  := t[2] + x[2]*y[3] + A
+	ADCXQ BP, CX
+	MULXQ R9, AX, BP
+	ADOXQ AX, CX
+
+	// (A,t[3])  := t[3] + x[3]*y[3] + A
+	ADCXQ BP, BX
+	MULXQ R10, AX, BP
+	ADOXQ AX, BX
+
+	// A += carries from ADCXQ and ADOXQ
+	MOVQ  $0, AX
+	ADCXQ AX, BP
+	ADOXQ AX, BP
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+
+	// clear the flags
+	XORQ AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, R12
+	ADCXQ R14, AX
+	MOVQ  R12, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+
+	// t[3] = C + A
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ BP, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,R12,R11,DI)
+	REDUCE(R14,R13,CX,BX,SI,R12,R11,DI)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+l1:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	MOVQ x+8(FP), AX
+	MOVQ AX, 8(SP)
+	MOVQ y+16(FP), AX
+	MOVQ AX, 16(SP)
+	CALL ·_mulGeneric(SB)
+	RET
+
+TEXT ·fromMont(SB), $8-8
+	NO_LOCAL_POINTERS
+
+	// the algorithm is described here
+	// https://hackmd.io/@zkteam/modular_multiplication
+	// when y = 1 we have:
+	// for i=0 to N-1
+	// 		t[i] = x[i]
+	// for i=0 to N-1
+	// 		m := t[0]*q'[0] mod W
+	// 		C,_ := t[0] + m*q[0]
+	// 		for j=1 to N-1
+	// 		    (C,t[j-1]) := t[j] + m*q[j] + C
+	// 		t[N-1] = C
+	CMPB ·supportAdx(SB), $1
+	JNE  l2
+	MOVQ res+0(FP), DX
+	MOVQ 0(DX), R14
+	MOVQ 8(DX), R13
+	MOVQ 16(DX), CX
+	MOVQ 24(DX), BX
+	XORQ DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+	XORQ  DX, DX
+
+	// m := t[0]*q'[0] mod W
+	MOVQ  qInv0<>(SB), DX
+	IMULQ R14, DX
+	XORQ  AX, AX
+
+	// C,_ := t[0] + m*q[0]
+	MULXQ q<>+0(SB), AX, BP
+	ADCXQ R14, AX
+	MOVQ  BP, R14
+
+	// (C,t[0]) := t[1] + m*q[1] + C
+	ADCXQ R13, R14
+	MULXQ q<>+8(SB), AX, R13
+	ADOXQ AX, R14
+
+	// (C,t[1]) := t[2] + m*q[2] + C
+	ADCXQ CX, R13
+	MULXQ q<>+16(SB), AX, CX
+	ADOXQ AX, R13
+
+	// (C,t[2]) := t[3] + m*q[3] + C
+	ADCXQ BX, CX
+	MULXQ q<>+24(SB), AX, BX
+	ADOXQ AX, CX
+	MOVQ  $0, AX
+	ADCXQ AX, BX
+	ADOXQ AX, BX
+
+	// reduce element(R14,R13,CX,BX) using temp registers (SI,DI,R8,R9)
+	REDUCE(R14,R13,CX,BX,SI,DI,R8,R9)
+
+	MOVQ res+0(FP), AX
+	MOVQ R14, 0(AX)
+	MOVQ R13, 8(AX)
+	MOVQ CX, 16(AX)
+	MOVQ BX, 24(AX)
+	RET
+
+l2:
+	MOVQ res+0(FP), AX
+	MOVQ AX, (SP)
+	CALL ·_fromMontGeneric(SB)
+	RET
diff --git a/ecc/bls12-378/fr/element_ops_amd64.go b/ecc/bls12-378/fr/element_ops_amd64.go
new file mode 100644
index 000000000..78022b3e6
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_amd64.go
@@ -0,0 +1,50 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+//go:noescape
+func MulBy3(x *Element)
+
+//go:noescape
+func MulBy5(x *Element)
+
+//go:noescape
+func MulBy13(x *Element)
+
+//go:noescape
+func add(res, x, y *Element)
+
+//go:noescape
+func sub(res, x, y *Element)
+
+//go:noescape
+func neg(res, x *Element)
+
+//go:noescape
+func double(res, x *Element)
+
+//go:noescape
+func mul(res, x, y *Element)
+
+//go:noescape
+func fromMont(res *Element)
+
+//go:noescape
+func reduce(res *Element)
+
+//go:noescape
+func Butterfly(a, b *Element)
diff --git a/ecc/bls12-378/fr/element_ops_amd64.s b/ecc/bls12-378/fr/element_ops_amd64.s
new file mode 100644
index 000000000..a8d8f4ca4
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_amd64.s
@@ -0,0 +1,340 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x3291440000000001
+DATA q<>+8(SB)/8, $0xeae77f3da0940001
+DATA q<>+16(SB)/8, $0x87787fb4e3dbb0ff
+DATA q<>+24(SB)/8, $0x20e7b9c8ef7b2eb1
+GLOBL q<>(SB), (RODATA+NOPTR), $32
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x329143ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+
+// add(res, x, y *Element)
+TEXT ·add(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), CX
+	ADCQ 8(DX), BX
+	ADCQ 16(DX), SI
+	ADCQ 24(DX), DI
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ res+0(FP), R12
+	MOVQ CX, 0(R12)
+	MOVQ BX, 8(R12)
+	MOVQ SI, 16(R12)
+	MOVQ DI, 24(R12)
+	RET
+
+// sub(res, x, y *Element)
+TEXT ·sub(SB), NOSPLIT, $0-24
+	XORQ    DI, DI
+	MOVQ    x+8(FP), SI
+	MOVQ    0(SI), AX
+	MOVQ    8(SI), DX
+	MOVQ    16(SI), CX
+	MOVQ    24(SI), BX
+	MOVQ    y+16(FP), SI
+	SUBQ    0(SI), AX
+	SBBQ    8(SI), DX
+	SBBQ    16(SI), CX
+	SBBQ    24(SI), BX
+	MOVQ    $0x3291440000000001, R8
+	MOVQ    $0xeae77f3da0940001, R9
+	MOVQ    $0x87787fb4e3dbb0ff, R10
+	MOVQ    $0x20e7b9c8ef7b2eb1, R11
+	CMOVQCC DI, R8
+	CMOVQCC DI, R9
+	CMOVQCC DI, R10
+	CMOVQCC DI, R11
+	ADDQ    R8, AX
+	ADCQ    R9, DX
+	ADCQ    R10, CX
+	ADCQ    R11, BX
+	MOVQ    res+0(FP), R12
+	MOVQ    AX, 0(R12)
+	MOVQ    DX, 8(R12)
+	MOVQ    CX, 16(R12)
+	MOVQ    BX, 24(R12)
+	RET
+
+// double(res, x *Element)
+TEXT ·double(SB), NOSPLIT, $0-16
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ res+0(FP), R11
+	MOVQ DX, 0(R11)
+	MOVQ CX, 8(R11)
+	MOVQ BX, 16(R11)
+	MOVQ SI, 24(R11)
+	RET
+
+// neg(res, x *Element)
+TEXT ·neg(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), DI
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), DX
+	MOVQ  8(AX), CX
+	MOVQ  16(AX), BX
+	MOVQ  24(AX), SI
+	MOVQ  DX, AX
+	ORQ   CX, AX
+	ORQ   BX, AX
+	ORQ   SI, AX
+	TESTQ AX, AX
+	JEQ   l1
+	MOVQ  $0x3291440000000001, R8
+	SUBQ  DX, R8
+	MOVQ  R8, 0(DI)
+	MOVQ  $0xeae77f3da0940001, R8
+	SBBQ  CX, R8
+	MOVQ  R8, 8(DI)
+	MOVQ  $0x87787fb4e3dbb0ff, R8
+	SBBQ  BX, R8
+	MOVQ  R8, 16(DI)
+	MOVQ  $0x20e7b9c8ef7b2eb1, R8
+	SBBQ  SI, R8
+	MOVQ  R8, 24(DI)
+	RET
+
+l1:
+	MOVQ AX, 0(DI)
+	MOVQ AX, 8(DI)
+	MOVQ AX, 16(DI)
+	MOVQ AX, 24(DI)
+	RET
+
+TEXT ·reduce(SB), NOSPLIT, $0-8
+	MOVQ res+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy3(x *Element)
+TEXT ·MulBy3(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy5(x *Element)
+TEXT ·MulBy5(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
+	REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// MulBy13(x *Element)
+TEXT ·MulBy13(SB), NOSPLIT, $0-8
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), DX
+	MOVQ 8(AX), CX
+	MOVQ 16(AX), BX
+	MOVQ 24(AX), SI
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
+	REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
+
+	MOVQ DX, R11
+	MOVQ CX, R12
+	MOVQ BX, R13
+	MOVQ SI, R14
+	ADDQ DX, DX
+	ADCQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ R11, DX
+	ADCQ R12, CX
+	ADCQ R13, BX
+	ADCQ R14, SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	ADDQ 0(AX), DX
+	ADCQ 8(AX), CX
+	ADCQ 16(AX), BX
+	ADCQ 24(AX), SI
+
+	// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
+	REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
+
+	MOVQ DX, 0(AX)
+	MOVQ CX, 8(AX)
+	MOVQ BX, 16(AX)
+	MOVQ SI, 24(AX)
+	RET
+
+// Butterfly(a, b *Element) sets a = a + b; b = a - b
+TEXT ·Butterfly(SB), NOSPLIT, $0-16
+	MOVQ    a+0(FP), AX
+	MOVQ    0(AX), CX
+	MOVQ    8(AX), BX
+	MOVQ    16(AX), SI
+	MOVQ    24(AX), DI
+	MOVQ    CX, R8
+	MOVQ    BX, R9
+	MOVQ    SI, R10
+	MOVQ    DI, R11
+	XORQ    AX, AX
+	MOVQ    b+8(FP), DX
+	ADDQ    0(DX), CX
+	ADCQ    8(DX), BX
+	ADCQ    16(DX), SI
+	ADCQ    24(DX), DI
+	SUBQ    0(DX), R8
+	SBBQ    8(DX), R9
+	SBBQ    16(DX), R10
+	SBBQ    24(DX), R11
+	MOVQ    $0x3291440000000001, R12
+	MOVQ    $0xeae77f3da0940001, R13
+	MOVQ    $0x87787fb4e3dbb0ff, R14
+	MOVQ    $0x20e7b9c8ef7b2eb1, R15
+	CMOVQCC AX, R12
+	CMOVQCC AX, R13
+	CMOVQCC AX, R14
+	CMOVQCC AX, R15
+	ADDQ    R12, R8
+	ADCQ    R13, R9
+	ADCQ    R14, R10
+	ADCQ    R15, R11
+	MOVQ    R8, 0(DX)
+	MOVQ    R9, 8(DX)
+	MOVQ    R10, 16(DX)
+	MOVQ    R11, 24(DX)
+
+	// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
+
+	MOVQ a+0(FP), AX
+	MOVQ CX, 0(AX)
+	MOVQ BX, 8(AX)
+	MOVQ SI, 16(AX)
+	MOVQ DI, 24(AX)
+	RET
diff --git a/ecc/bls12-378/fr/element_ops_noasm.go b/ecc/bls12-378/fr/element_ops_noasm.go
new file mode 100644
index 000000000..ec1fac18d
--- /dev/null
+++ b/ecc/bls12-378/fr/element_ops_noasm.go
@@ -0,0 +1,78 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+// /!\ WARNING /!\
+// this code has not been audited and is provided as-is. In particular,
+// there is no security guarantees such as constant time implementation
+// or side-channel attack resistance
+// /!\ WARNING /!\
+
+// MulBy3 x *= 3
+func MulBy3(x *Element) {
+	mulByConstant(x, 3)
+}
+
+// MulBy5 x *= 5
+func MulBy5(x *Element) {
+	mulByConstant(x, 5)
+}
+
+// MulBy13 x *= 13
+func MulBy13(x *Element) {
+	mulByConstant(x, 13)
+}
+
+// Butterfly sets
+// a = a + b
+// b = a - b
+func Butterfly(a, b *Element) {
+	_butterflyGeneric(a, b)
+}
+
+func mul(z, x, y *Element) {
+	_mulGeneric(z, x, y)
+}
+
+// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
+// sets and returns z = z * 1
+func fromMont(z *Element) {
+	_fromMontGeneric(z)
+}
+
+func add(z, x, y *Element) {
+	_addGeneric(z, x, y)
+}
+
+func double(z, x *Element) {
+	_doubleGeneric(z, x)
+}
+
+func sub(z, x, y *Element) {
+	_subGeneric(z, x, y)
+}
+
+func neg(z, x *Element) {
+	_negGeneric(z, x)
+}
+
+func reduce(z *Element) {
+	_reduceGeneric(z)
+}
diff --git a/ecc/bls12-378/fr/element_test.go b/ecc/bls12-378/fr/element_test.go
new file mode 100644
index 000000000..34311fe7b
--- /dev/null
+++ b/ecc/bls12-378/fr/element_test.go
@@ -0,0 +1,2649 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fr
+
+import (
+	"crypto/rand"
+	"encoding/json"
+	"fmt"
+	"math/big"
+	"math/bits"
+	mrand "math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	ggen "github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/stretchr/testify/require"
+)
+
+// -------------------------------------------------------------------------------------------------
+// benchmarks
+// most benchmarks are rudimentary and should sample a large number of random inputs
+// or be run multiple times to ensure it didn't measure the fastest path of the function
+
+var benchResElement Element
+
+func BenchmarkElementSetBytes(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	bb := x.Bytes()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.SetBytes(bb[:])
+	}
+
+}
+
+func BenchmarkElementMulByConstants(b *testing.B) {
+	b.Run("mulBy3", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy3(&benchResElement)
+		}
+	})
+	b.Run("mulBy5", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy5(&benchResElement)
+		}
+	})
+	b.Run("mulBy13", func(b *testing.B) {
+		benchResElement.SetRandom()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			MulBy13(&benchResElement)
+		}
+	})
+}
+
+func BenchmarkElementInverse(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		benchResElement.Inverse(&x)
+	}
+
+}
+
+func BenchmarkElementButterfly(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Butterfly(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementExp(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b1, _ := rand.Int(rand.Reader, Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Exp(x, b1)
+	}
+}
+
+func BenchmarkElementDouble(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Double(&benchResElement)
+	}
+}
+
+func BenchmarkElementAdd(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Add(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementSub(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sub(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementNeg(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Neg(&benchResElement)
+	}
+}
+
+func BenchmarkElementDiv(b *testing.B) {
+	var x Element
+	x.SetRandom()
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Div(&x, &benchResElement)
+	}
+}
+
+func BenchmarkElementFromMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.FromMont()
+	}
+}
+
+func BenchmarkElementToMont(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.ToMont()
+	}
+}
+func BenchmarkElementSquare(b *testing.B) {
+	benchResElement.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Square(&benchResElement)
+	}
+}
+
+func BenchmarkElementSqrt(b *testing.B) {
+	var a Element
+	a.SetUint64(4)
+	a.Neg(&a)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Sqrt(&a)
+	}
+}
+
+func BenchmarkElementMul(b *testing.B) {
+	x := Element{
+		1260465344847950704,
+		15627634503313390135,
+		1085346480195626314,
+		405261321576397495,
+	}
+	benchResElement.SetOne()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Mul(&benchResElement, &x)
+	}
+}
+
+func BenchmarkElementCmp(b *testing.B) {
+	x := Element{
+		1260465344847950704,
+		15627634503313390135,
+		1085346480195626314,
+		405261321576397495,
+	}
+	benchResElement = x
+	benchResElement[0] = 0
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		benchResElement.Cmp(&x)
+	}
+}
+
+func TestElementCmp(t *testing.T) {
+	var x, y Element
+
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	one := One()
+	y.Sub(&y, &one)
+
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+
+	x = y
+	if x.Cmp(&y) != 0 {
+		t.Fatal("x == y")
+	}
+
+	x.Sub(&x, &one)
+	if x.Cmp(&y) != -1 {
+		t.Fatal("x < y")
+	}
+	if y.Cmp(&x) != 1 {
+		t.Fatal("x < y")
+	}
+}
+
+func TestElementIsRandom(t *testing.T) {
+	for i := 0; i < 50; i++ {
+		var x, y Element
+		x.SetRandom()
+		y.SetRandom()
+		if x.Equal(&y) {
+			t.Fatal("2 random numbers are unlikely to be equal")
+		}
+	}
+}
+
+// -------------------------------------------------------------------------------------------------
+// Gopter tests
+// most of them are generated with a template
+
+const (
+	nbFuzzShort = 200
+	nbFuzz      = 1000
+)
+
+// special values to be used in tests
+var staticTestValues []Element
+
+func init() {
+	staticTestValues = append(staticTestValues, Element{}) // zero
+	staticTestValues = append(staticTestValues, One())     // one
+	staticTestValues = append(staticTestValues, rSquare)   // r²
+	var e, one Element
+	one.SetOne()
+	e.Sub(&qElement, &one)
+	staticTestValues = append(staticTestValues, e) // q - 1
+	e.Double(&one)
+	staticTestValues = append(staticTestValues, e) // 2
+
+	{
+		a := qElement
+		a[3]--
+		staticTestValues = append(staticTestValues, a)
+	}
+	{
+		a := qElement
+		a[0]--
+		staticTestValues = append(staticTestValues, a)
+	}
+
+	for i := 0; i <= 3; i++ {
+		staticTestValues = append(staticTestValues, Element{uint64(i)})
+		staticTestValues = append(staticTestValues, Element{0, uint64(i)})
+	}
+
+	{
+		a := qElement
+		a[3]--
+		a[0]++
+		staticTestValues = append(staticTestValues, a)
+	}
+
+}
+
+func TestElementNegZero(t *testing.T) {
+	var a, b Element
+	b.SetZero()
+	for a.IsZero() {
+		a.SetRandom()
+	}
+	a.Neg(&b)
+	if !a.IsZero() {
+		t.Fatal("neg(0) != 0")
+	}
+}
+
+func TestElementReduce(t *testing.T) {
+	testValues := make([]Element, len(staticTestValues))
+	copy(testValues, staticTestValues)
+
+	for _, s := range testValues {
+		expected := s
+		reduce(&s)
+		_reduceGeneric(&expected)
+		if !s.Equal(&expected) {
+			t.Fatal("reduce failed: asm and generic impl don't match")
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := genFull()
+
+	properties.Property("reduce should output a result smaller than modulus", prop.ForAll(
+		func(a Element) bool {
+			b := a
+			reduce(&a)
+			_reduceGeneric(&b)
+			return !a.biggerOrEqualModulus() && a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementBytes(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("SetBytes(Bytes()) should stayt constant", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			bytes := a.element.Bytes()
+			b.SetBytes(bytes[:])
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementInverseExp(t *testing.T) {
+	// inverse must be equal to exp^-2
+	exp := Modulus()
+	exp.Sub(exp, new(big.Int).SetUint64(2))
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("inv == exp^-2", prop.ForAll(
+		func(a testPairElement) bool {
+			var b Element
+			b.Set(&a.element)
+			a.element.Inverse(&a.element)
+			b.Exp(b, exp)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestElementMulByConstants(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	implemented := []uint8{0, 1, 2, 3, 5, 13}
+	properties.Property("mulByConstant", prop.ForAll(
+		func(a testPairElement) bool {
+			for _, c := range implemented {
+				var constant Element
+				constant.SetUint64(uint64(c))
+
+				b := a.element
+				b.Mul(&b, &constant)
+
+				aa := a.element
+				mulByConstant(&aa, c)
+
+				if !aa.Equal(&b) {
+					return false
+				}
+			}
+
+			return true
+		},
+		genA,
+	))
+
+	properties.Property("MulBy3(x) == Mul(x, 3)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(3)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy3(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy5(x) == Mul(x, 5)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(5)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy5(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("MulBy13(x) == Mul(x, 13)", prop.ForAll(
+		func(a testPairElement) bool {
+			var constant Element
+			constant.SetUint64(13)
+
+			b := a.element
+			b.Mul(&b, &constant)
+
+			MulBy13(&a.element)
+
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLegendre(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("legendre should output same result than big.Int.Jacobi", prop.ForAll(
+		func(a testPairElement) bool {
+			return a.element.Legendre() == big.Jacobi(&a.bigint, Modulus())
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementButterflies(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("butterfly0 == a -b; a +b", prop.ForAll(
+		func(a, b testPairElement) bool {
+			a0, b0 := a.element, b.element
+
+			_butterflyGeneric(&a.element, &b.element)
+			Butterfly(&a0, &b0)
+
+			return a.element.Equal(&a0) && b.element.Equal(&b0)
+		},
+		genA,
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementLexicographicallyLargest(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("element.Cmp should match LexicographicallyLargest output", prop.ForAll(
+		func(a testPairElement) bool {
+			var negA Element
+			negA.Neg(&a.element)
+
+			cmpResult := a.element.Cmp(&negA)
+			lResult := a.element.LexicographicallyLargest()
+
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+
+}
+
+func TestElementAdd(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Add: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Add(&a.element, &b.element)
+			a.element.Add(&a.element, &b.element)
+			b.element.Add(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Add(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Add(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Add(&a.element, &r)
+				d.Add(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Add(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Add: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Add(&a.element, &b.element)
+			_addGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Add(&a, &b)
+				d.Add(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_addGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Add failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Add failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSub(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Sub: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Sub(&a.element, &b.element)
+			a.element.Sub(&a.element, &b.element)
+			b.element.Sub(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Sub(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Sub(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Sub(&a.element, &r)
+				d.Sub(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Sub(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Sub: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Sub(&a.element, &b.element)
+			_subGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Sub(&a, &b)
+				d.Sub(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_subGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Sub failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Sub failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementMul(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Mul: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Mul(&a.element, &b.element)
+			a.element.Mul(&a.element, &b.element)
+			b.element.Mul(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Mul(&a.element, &b.element)
+
+				var d, e big.Int
+				d.Mul(&a.bigint, &b.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Mul(&a.element, &r)
+				d.Mul(&a.bigint, &rb).Mod(&d, Modulus())
+
+				// checking generic impl against asm path
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a.element, &r)
+				if !cGeneric.Equal(&c) {
+					// need to give context to failing error.
+					return false
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Mul(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Mul: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			c.Mul(&a.element, &b.element)
+			_mulGeneric(&d, &a.element, &b.element)
+			return c.Equal(&d)
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Mul(&a, &b)
+				d.Mul(&aBig, &bBig).Mod(&d, Modulus())
+
+				// checking asm against generic impl
+				var cGeneric Element
+				_mulGeneric(&cGeneric, &a, &b)
+				if !cGeneric.Equal(&c) {
+					t.Fatal("Mul failed special test values: asm and generic impl don't match")
+				}
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Mul failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDiv(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Div: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Div(&a.element, &b.element)
+			a.element.Div(&a.element, &b.element)
+			b.element.Div(&d, &b.element)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Div(&a.element, &b.element)
+
+				var d, e big.Int
+				d.ModInverse(&b.bigint, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Div(&a.element, &r)
+				d.ModInverse(&rb, Modulus())
+				d.Mul(&d, &a.bigint).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Div: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Div(&a.element, &b.element)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Div(&a, &b)
+				d.ModInverse(&bBig, Modulus())
+				d.Mul(&d, &aBig).Mod(&d, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Div failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementExp(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genB := gen()
+
+	properties.Property("Exp: having the receiver as operand should output the same result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c, d Element
+			d.Set(&a.element)
+
+			c.Exp(a.element, &b.bigint)
+			a.element.Exp(a.element, &b.bigint)
+			b.element.Exp(d, &b.bigint)
+
+			return a.element.Equal(&b.element) && a.element.Equal(&c) && b.element.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must match big.Int result", prop.ForAll(
+		func(a, b testPairElement) bool {
+			{
+				var c Element
+
+				c.Exp(a.element, &b.bigint)
+
+				var d, e big.Int
+				d.Exp(&a.bigint, &b.bigint, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+
+			// fixed elements
+			// a is random
+			// r takes special values
+			testValues := make([]Element, len(staticTestValues))
+			copy(testValues, staticTestValues)
+
+			for _, r := range testValues {
+				var d, e, rb big.Int
+				r.ToBigIntRegular(&rb)
+
+				var c Element
+				c.Exp(a.element, &rb)
+				d.Exp(&a.bigint, &rb, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					return false
+				}
+			}
+			return true
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("Exp: operation result must be smaller than modulus", prop.ForAll(
+		func(a, b testPairElement) bool {
+			var c Element
+
+			c.Exp(a.element, &b.bigint)
+
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+		genB,
+	))
+
+	specialValueTest := func() {
+		// test special values against special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			for _, b := range testValues {
+
+				var bBig, d, e big.Int
+				b.ToBigIntRegular(&bBig)
+
+				var c Element
+				c.Exp(a, &bBig)
+				d.Exp(&aBig, &bBig, Modulus())
+
+				if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+					t.Fatal("Exp failed special test values")
+				}
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSquare(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Square: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Square(&a.element)
+			a.element.Square(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+
+			var d, e big.Int
+			d.Mul(&a.bigint, &a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Square: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Square(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Square(&a)
+
+			var d, e big.Int
+			d.Mul(&aBig, &aBig).Mod(&d, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Square failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementInverse(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Inverse: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Inverse(&a.element)
+			a.element.Inverse(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+
+			var d, e big.Int
+			d.ModInverse(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Inverse: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Inverse(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Inverse(&a)
+
+			var d, e big.Int
+			d.ModInverse(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Inverse failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementSqrt(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Sqrt: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			b := a.element
+
+			b.Sqrt(&a.element)
+			a.element.Sqrt(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+
+			var d, e big.Int
+			d.ModSqrt(&a.bigint, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Sqrt: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Sqrt(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Sqrt(&a)
+
+			var d, e big.Int
+			d.ModSqrt(&aBig, Modulus())
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Sqrt failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementDouble(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Double: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Double(&a.element)
+			a.element.Double(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+
+			var d, e big.Int
+			d.Lsh(&a.bigint, 1).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Double: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Double(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Double: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Double(&a.element)
+			_doubleGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Double(&a)
+
+			var d, e big.Int
+			d.Lsh(&aBig, 1).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_doubleGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Double failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Double failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementNeg(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Neg: having the receiver as operand should output the same result", prop.ForAll(
+		func(a testPairElement) bool {
+
+			var b Element
+
+			b.Neg(&a.element)
+			a.element.Neg(&a.element)
+			return a.element.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must match big.Int result", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+
+			var d, e big.Int
+			d.Neg(&a.bigint).Mod(&d, Modulus())
+
+			return c.FromMont().ToBigInt(&e).Cmp(&d) == 0
+		},
+		genA,
+	))
+
+	properties.Property("Neg: operation result must be smaller than modulus", prop.ForAll(
+		func(a testPairElement) bool {
+			var c Element
+			c.Neg(&a.element)
+			return !c.biggerOrEqualModulus()
+		},
+		genA,
+	))
+
+	properties.Property("Neg: assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			var c, d Element
+			c.Neg(&a.element)
+			_negGeneric(&d, &a.element)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	specialValueTest := func() {
+		// test special values
+		testValues := make([]Element, len(staticTestValues))
+		copy(testValues, staticTestValues)
+
+		for _, a := range testValues {
+			var aBig big.Int
+			a.ToBigIntRegular(&aBig)
+			var c Element
+			c.Neg(&a)
+
+			var d, e big.Int
+			d.Neg(&aBig).Mod(&d, Modulus())
+
+			// checking asm against generic impl
+			var cGeneric Element
+			_negGeneric(&cGeneric, &a)
+			if !cGeneric.Equal(&c) {
+				t.Fatal("Neg failed special test values: asm and generic impl don't match")
+			}
+
+			if c.FromMont().ToBigInt(&e).Cmp(&d) != 0 {
+				t.Fatal("Neg failed special test values")
+			}
+		}
+	}
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+	specialValueTest()
+	// if we have ADX instruction enabled, test both path in assembly
+	if supportAdx {
+		supportAdx = false
+		t.Log("disabling ADX")
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		specialValueTest()
+		supportAdx = true
+	}
+}
+
+func TestElementFixedExp(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	var (
+		_bLegendreExponentElement *big.Int
+		_bSqrtExponentElement     *big.Int
+	)
+
+	_bLegendreExponentElement, _ = new(big.Int).SetString("1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000", 16)
+	const sqrtExponentElement = "41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228"
+	_bSqrtExponentElement, _ = new(big.Int).SetString(sqrtExponentElement, 16)
+
+	genA := gen()
+
+	properties.Property(fmt.Sprintf("expBySqrtExp must match Exp(%s)", sqrtExponentElement), prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expBySqrtExp(c)
+			d.Exp(d, _bSqrtExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("expByLegendreExp must match Exp(1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000)", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.expByLegendreExp(c)
+			d.Exp(d, _bLegendreExponentElement)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementHalve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	var twoInv Element
+	twoInv.SetUint64(2)
+	twoInv.Inverse(&twoInv)
+
+	properties.Property("z.Halve must match z / 2", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.Halve()
+			d.Mul(&d, &twoInv)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInt64(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("z.SetInt64 must match z.SetString", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInt64(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, ggen.Int64(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementSetInterface(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+	genInt := ggen.Int
+	genInt8 := ggen.Int8
+	genInt16 := ggen.Int16
+	genInt32 := ggen.Int32
+	genInt64 := ggen.Int64
+
+	genUint := ggen.UInt
+	genUint8 := ggen.UInt8
+	genUint16 := ggen.UInt16
+	genUint32 := ggen.UInt32
+	genUint64 := ggen.UInt64
+
+	properties.Property("z.SetInterface must match z.SetString with int8", prop.ForAll(
+		func(a testPairElement, v int8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int16", prop.ForAll(
+		func(a testPairElement, v int16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int32", prop.ForAll(
+		func(a testPairElement, v int32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int64", prop.ForAll(
+		func(a testPairElement, v int64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with int", prop.ForAll(
+		func(a testPairElement, v int) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genInt(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint8", prop.ForAll(
+		func(a testPairElement, v uint8) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint8(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint16", prop.ForAll(
+		func(a testPairElement, v uint16) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint16(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint32", prop.ForAll(
+		func(a testPairElement, v uint32) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint32(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint64", prop.ForAll(
+		func(a testPairElement, v uint64) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint64(),
+	))
+
+	properties.Property("z.SetInterface must match z.SetString with uint", prop.ForAll(
+		func(a testPairElement, v uint) bool {
+			c := a.element
+			d := a.element
+
+			c.SetInterface(v)
+			d.SetString(fmt.Sprintf("%v", v))
+
+			return c.Equal(&d)
+		},
+		genA, genUint(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementFromMont(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = nbFuzzShort
+	} else {
+		parameters.MinSuccessfulTests = nbFuzz
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := gen()
+
+	properties.Property("Assembly implementation must be consistent with generic one", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			d := a.element
+			c.FromMont()
+			_fromMontGeneric(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("x.FromMont().ToMont() == x", prop.ForAll(
+		func(a testPairElement) bool {
+			c := a.element
+			c.FromMont().ToMont()
+			return c.Equal(&a.element)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestElementJSON(t *testing.T) {
+	assert := require.New(t)
+
+	type S struct {
+		A Element
+		B [3]Element
+		C *Element
+		D *Element
+	}
+
+	// encode to JSON
+	var s S
+	s.A.SetString("-1")
+	s.B[2].SetUint64(42)
+	s.D = new(Element).SetUint64(8000)
+
+	encoded, err := json.Marshal(&s)
+	assert.NoError(err)
+	expected := "{\"A\":-1,\"B\":[0,0,42],\"C\":null,\"D\":8000}"
+	assert.Equal(string(encoded), expected)
+
+	// decode valid
+	var decoded S
+	err = json.Unmarshal([]byte(expected), &decoded)
+	assert.NoError(err)
+
+	assert.Equal(s, decoded, "element -> json -> element round trip failed")
+
+	// decode hex and string values
+	withHexValues := "{\"A\":\"-1\",\"B\":[0,\"0x00000\",\"0x2A\"],\"C\":null,\"D\":\"8000\"}"
+
+	var decodedS S
+	err = json.Unmarshal([]byte(withHexValues), &decodedS)
+	assert.NoError(err)
+
+	assert.Equal(s, decodedS, " json with strings  -> element  failed")
+
+}
+
+type testPairElement struct {
+	element Element
+	bigint  big.Int
+}
+
+func (z *Element) biggerOrEqualModulus() bool {
+	if z[3] > qElement[3] {
+		return true
+	}
+	if z[3] < qElement[3] {
+		return false
+	}
+
+	if z[2] > qElement[2] {
+		return true
+	}
+	if z[2] < qElement[2] {
+		return false
+	}
+
+	if z[1] > qElement[1] {
+		return true
+	}
+	if z[1] < qElement[1] {
+		return false
+	}
+
+	return z[0] >= qElement[0]
+}
+
+func gen() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var g testPairElement
+
+		g.element = Element{
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+			genParams.NextUint64(),
+		}
+		if qElement[3] != ^uint64(0) {
+			g.element[3] %= (qElement[3] + 1)
+		}
+
+		for g.element.biggerOrEqualModulus() {
+			g.element = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+			if qElement[3] != ^uint64(0) {
+				g.element[3] %= (qElement[3] + 1)
+			}
+		}
+
+		g.element.ToBigIntRegular(&g.bigint)
+		genResult := gopter.NewGenResult(g, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func genFull() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomFq := func() Element {
+			var g Element
+
+			g = Element{
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+				genParams.NextUint64(),
+			}
+
+			if qElement[3] != ^uint64(0) {
+				g[3] %= (qElement[3] + 1)
+			}
+
+			for g.biggerOrEqualModulus() {
+				g = Element{
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+					genParams.NextUint64(),
+				}
+				if qElement[3] != ^uint64(0) {
+					g[3] %= (qElement[3] + 1)
+				}
+			}
+
+			return g
+		}
+		a := genRandomFq()
+
+		var carry uint64
+		a[0], carry = bits.Add64(a[0], qElement[0], carry)
+		a[1], carry = bits.Add64(a[1], qElement[1], carry)
+		a[2], carry = bits.Add64(a[2], qElement[2], carry)
+		a[3], _ = bits.Add64(a[3], qElement[3], carry)
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func TestElementInversionApproximation(t *testing.T) {
+	var x Element
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+
+		// Normally small elements are unlikely. Here we give them a higher chance
+		xZeros := mrand.Int() % Limbs
+		for j := 1; j < xZeros; j++ {
+			x[Limbs-j] = 0
+		}
+
+		a := approximate(&x, x.BitLen())
+		aRef := approximateRef(&x)
+
+		if a != aRef {
+			t.Error("Approximation mismatch")
+		}
+	}
+}
+
+func TestElementInversionCorrectionFactorFormula(t *testing.T) {
+	const kLimbs = k * Limbs
+	const power = kLimbs*6 + invIterationsN*(kLimbs-k+1)
+	factorInt := big.NewInt(1)
+	factorInt.Lsh(factorInt, power)
+	factorInt.Mod(factorInt, Modulus())
+
+	var refFactorInt big.Int
+	inversionCorrectionFactor := Element{
+		inversionCorrectionFactorWord0,
+		inversionCorrectionFactorWord1,
+		inversionCorrectionFactorWord2,
+		inversionCorrectionFactorWord3,
+	}
+	inversionCorrectionFactor.ToBigInt(&refFactorInt)
+
+	if refFactorInt.Cmp(factorInt) != 0 {
+		t.Error("mismatch")
+	}
+}
+
+func TestElementLinearComb(t *testing.T) {
+	var x Element
+	var y Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		y.SetRandom()
+		testLinearComb(t, &x, mrand.Int63(), &y, mrand.Int63())
+	}
+}
+
+// Probably unnecessary post-dev. In case the output of inv is wrong, this checks whether it's only off by a constant factor.
+func TestElementInversionCorrectionFactor(t *testing.T) {
+
+	// (1/x)/inv(x) = (1/1)/inv(1) ⇔ inv(1) = x inv(x)
+
+	var one Element
+	var oneInv Element
+	one.SetOne()
+	oneInv.Inverse(&one)
+
+	for i := 0; i < 100; i++ {
+		var x Element
+		var xInv Element
+		x.SetRandom()
+		xInv.Inverse(&x)
+
+		x.Mul(&x, &xInv)
+		if !x.Equal(&oneInv) {
+			t.Error("Correction factor is inconsistent")
+		}
+	}
+
+	if !oneInv.Equal(&one) {
+		var i big.Int
+		oneInv.ToBigIntRegular(&i) // no montgomery
+		i.ModInverse(&i, Modulus())
+		var fac Element
+		fac.setBigInt(&i) // back to montgomery
+
+		var facTimesFac Element
+		facTimesFac.Mul(&fac, &Element{
+			inversionCorrectionFactorWord0,
+			inversionCorrectionFactorWord1,
+			inversionCorrectionFactorWord2,
+			inversionCorrectionFactorWord3,
+		})
+
+		t.Error("Correction factor is consistently off by", fac, "Should be", facTimesFac)
+	}
+}
+
+func TestElementBigNumNeg(t *testing.T) {
+	var a Element
+	aHi := a.neg(&a, 0)
+	if !a.IsZero() || aHi != 0 {
+		t.Error("-0 != 0")
+	}
+}
+
+func TestElementBigNumWMul(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		w := mrand.Int63()
+		testBigNumWMul(t, &x, w)
+	}
+}
+
+func TestElementVeryBigIntConversion(t *testing.T) {
+	xHi := mrand.Uint64()
+	var x Element
+	x.SetRandom()
+	var xInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	x.assertMatchVeryBigInt(t, xHi, &xInt)
+}
+
+func TestElementMontReducePos(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64() & ^signBitSelector)
+	}
+}
+
+func TestElementMontReduceNeg(t *testing.T) {
+	var x Element
+
+	for i := 0; i < 1000; i++ {
+		x.SetRandom()
+		testMontReduceSigned(t, &x, mrand.Uint64()|signBitSelector)
+	}
+}
+
+func TestElementMontNegMultipleOfR(t *testing.T) {
+	var zero Element
+
+	for i := 0; i < 1000; i++ {
+		testMontReduceSigned(t, &zero, mrand.Uint64()|signBitSelector)
+	}
+}
+
+//TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
+func TestUpdateFactorSubtraction(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+
+		f0, g0 := randomizeUpdateFactors()
+		f1, g1 := randomizeUpdateFactors()
+
+		for f0-f1 > 1<<31 || f0-f1 <= -1<<31 {
+			f1 /= 2
+		}
+
+		for g0-g1 > 1<<31 || g0-g1 <= -1<<31 {
+			g1 /= 2
+		}
+
+		c0 := updateFactorsCompose(f0, g0)
+		c1 := updateFactorsCompose(f1, g1)
+
+		cRes := c0 - c1
+		fRes, gRes := updateFactorsDecompose(cRes)
+
+		if fRes != f0-f1 || gRes != g0-g1 {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsDouble(t *testing.T) {
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f > 1<<30 || f < (-1<<31+1)/2 {
+			f /= 2
+			if g <= 1<<29 && g >= (-1<<31+1)/4 {
+				g *= 2 //g was kept small on f's account. Now that we're halving f, we can double g
+			}
+		}
+
+		if g > 1<<30 || g < (-1<<31+1)/2 {
+			g /= 2
+
+			if f <= 1<<29 && f >= (-1<<31+1)/4 {
+				f *= 2 //f was kept small on g's account. Now that we're halving g, we can double f
+			}
+		}
+
+		c := updateFactorsCompose(f, g)
+		cD := c * 2
+		fD, gD := updateFactorsDecompose(cD)
+
+		if fD != 2*f || gD != 2*g {
+			t.Error(i)
+		}
+	}
+}
+
+func TestUpdateFactorsNeg(t *testing.T) {
+	var fMistake bool
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+
+		if f == 0x80000000 || g == 0x80000000 {
+			// Update factors this large can only have been obtained after 31 iterations and will therefore never be negated
+			// We don't have capacity to store -2³¹
+			// Repeat this iteration
+			i--
+			continue
+		}
+
+		c := updateFactorsCompose(f, g)
+		nc := -c
+		nf, ng := updateFactorsDecompose(nc)
+		fMistake = fMistake || nf != -f
+		if nf != -f || ng != -g {
+			t.Errorf("Mismatch iteration #%d:\n%d, %d ->\n %d -> %d ->\n %d, %d\n Inputs in hex: %X, %X",
+				i, f, g, c, nc, nf, ng, f, g)
+		}
+	}
+	if fMistake {
+		t.Error("Mistake with f detected")
+	} else {
+		t.Log("All good with f")
+	}
+}
+
+func TestUpdateFactorsNeg0(t *testing.T) {
+	c := updateFactorsCompose(0, 0)
+	t.Logf("c(0,0) = %X", c)
+	cn := -c
+
+	if c != cn {
+		t.Error("Negation of zero update factors should yield the same result.")
+	}
+}
+
+func TestUpdateFactorDecomposition(t *testing.T) {
+	var negSeen bool
+
+	for i := 0; i < 1000; i++ {
+
+		f, g := randomizeUpdateFactors()
+
+		if f <= -(1<<31) || f > 1<<31 {
+			t.Fatal("f out of range")
+		}
+
+		negSeen = negSeen || f < 0
+
+		c := updateFactorsCompose(f, g)
+
+		fBack, gBack := updateFactorsDecompose(c)
+
+		if f != fBack || g != gBack {
+			t.Errorf("(%d, %d) -> %d -> (%d, %d)\n", f, g, c, fBack, gBack)
+		}
+	}
+
+	if !negSeen {
+		t.Fatal("No negative f factors")
+	}
+}
+
+func TestUpdateFactorInitialValues(t *testing.T) {
+
+	f0, g0 := updateFactorsDecompose(updateFactorIdentityMatrixRow0)
+	f1, g1 := updateFactorsDecompose(updateFactorIdentityMatrixRow1)
+
+	if f0 != 1 || g0 != 0 || f1 != 0 || g1 != 1 {
+		t.Error("Update factor initial value constants are incorrect")
+	}
+}
+
+func TestUpdateFactorsRandomization(t *testing.T) {
+	var maxLen int
+
+	//t.Log("|f| + |g| is not to exceed", 1 << 31)
+	for i := 0; i < 1000; i++ {
+		f, g := randomizeUpdateFactors()
+		lf, lg := abs64T32(f), abs64T32(g)
+		absSum := lf + lg
+		if absSum >= 1<<31 {
+
+			if absSum == 1<<31 {
+				maxLen++
+			} else {
+				t.Error(i, "Sum of absolute values too large, f =", f, ",g =", g, ",|f| + |g| =", absSum)
+			}
+		}
+	}
+
+	if maxLen == 0 {
+		t.Error("max len not observed")
+	} else {
+		t.Log(maxLen, "maxLens observed")
+	}
+}
+
+func randomizeUpdateFactor(absLimit uint32) int64 {
+	const maxSizeLikelihood = 10
+	maxSize := mrand.Intn(maxSizeLikelihood)
+
+	absLimit64 := int64(absLimit)
+	var f int64
+	switch maxSize {
+	case 0:
+		f = absLimit64
+	case 1:
+		f = -absLimit64
+	default:
+		f = int64(mrand.Uint64()%(2*uint64(absLimit64)+1)) - absLimit64
+	}
+
+	if f > 1<<31 {
+		return 1 << 31
+	} else if f < -1<<31+1 {
+		return -1<<31 + 1
+	}
+
+	return f
+}
+
+func abs64T32(f int64) uint32 {
+	if f >= 1<<32 || f < -1<<32 {
+		panic("f out of range")
+	}
+
+	if f < 0 {
+		return uint32(-f)
+	}
+	return uint32(f)
+}
+
+func randomizeUpdateFactors() (int64, int64) {
+	var f [2]int64
+	b := mrand.Int() % 2
+
+	f[b] = randomizeUpdateFactor(1 << 31)
+
+	//As per the paper, |f| + |g| \le 2³¹.
+	f[1-b] = randomizeUpdateFactor(1<<31 - abs64T32(f[b]))
+
+	//Patching another edge case
+	if f[0]+f[1] == -1<<31 {
+		b = mrand.Int() % 2
+		f[b]++
+	}
+
+	return f[0], f[1]
+}
+
+func testLinearComb(t *testing.T, x *Element, xC int64, y *Element, yC int64) {
+
+	var p1 big.Int
+	x.ToBigInt(&p1)
+	p1.Mul(&p1, big.NewInt(xC))
+
+	var p2 big.Int
+	y.ToBigInt(&p2)
+	p2.Mul(&p2, big.NewInt(yC))
+
+	p1.Add(&p1, &p2)
+	p1.Mod(&p1, Modulus())
+	montReduce(&p1, &p1)
+
+	var z Element
+	z.linearCombSosSigned(x, xC, y, yC)
+	z.assertMatchVeryBigInt(t, 0, &p1)
+}
+
+func testBigNumWMul(t *testing.T, a *Element, c int64) {
+	var aHi uint64
+	var aTimes Element
+	aHi = aTimes.mulWRegular(a, c)
+
+	assertMulProduct(t, a, c, &aTimes, aHi)
+}
+
+func testMontReduceSigned(t *testing.T, x *Element, xHi uint64) {
+	var res Element
+	var xInt big.Int
+	var resInt big.Int
+	x.toVeryBigIntSigned(&xInt, xHi)
+	res.montReduceSigned(x, xHi)
+	montReduce(&resInt, &xInt)
+	res.assertMatchVeryBigInt(t, 0, &resInt)
+}
+
+func updateFactorsCompose(f int64, g int64) int64 {
+	return f + g<<32
+}
+
+var rInv big.Int
+
+func montReduce(res *big.Int, x *big.Int) {
+	if rInv.BitLen() == 0 { // initialization
+		rInv.SetUint64(1)
+		rInv.Lsh(&rInv, Limbs*64)
+		rInv.ModInverse(&rInv, Modulus())
+	}
+	res.Mul(x, &rInv)
+	res.Mod(res, Modulus())
+}
+
+func (z *Element) toVeryBigIntUnsigned(i *big.Int, xHi uint64) {
+	z.ToBigInt(i)
+	var upperWord big.Int
+	upperWord.SetUint64(xHi)
+	upperWord.Lsh(&upperWord, Limbs*64)
+	i.Add(&upperWord, i)
+}
+
+func (z *Element) toVeryBigIntSigned(i *big.Int, xHi uint64) {
+	z.toVeryBigIntUnsigned(i, xHi)
+	if signBitSelector&xHi != 0 {
+		twosCompModulus := big.NewInt(1)
+		twosCompModulus.Lsh(twosCompModulus, (Limbs+1)*64)
+		i.Sub(i, twosCompModulus)
+	}
+}
+
+func assertMulProduct(t *testing.T, x *Element, c int64, result *Element, resultHi uint64) big.Int {
+	var xInt big.Int
+	x.ToBigInt(&xInt)
+
+	xInt.Mul(&xInt, big.NewInt(c))
+
+	result.assertMatchVeryBigInt(t, resultHi, &xInt)
+	return xInt
+}
+
+func assertMatch(t *testing.T, w []big.Word, a uint64, index int) {
+
+	var wI big.Word
+
+	if index < len(w) {
+		wI = w[index]
+	}
+
+	const filter uint64 = 0xFFFFFFFFFFFFFFFF >> (64 - bits.UintSize)
+
+	a = a >> ((index * bits.UintSize) % 64)
+	a &= filter
+
+	if uint64(wI) != a {
+		t.Error("Bignum mismatch: disagreement on word", index)
+	}
+}
+
+func (z *Element) assertMatchVeryBigInt(t *testing.T, aHi uint64, aInt *big.Int) {
+
+	var modulus big.Int
+	var aIntMod big.Int
+	modulus.SetInt64(1)
+	modulus.Lsh(&modulus, (Limbs+1)*64)
+	aIntMod.Mod(aInt, &modulus)
+
+	words := aIntMod.Bits()
+
+	const steps = 64 / bits.UintSize
+	for i := 0; i < Limbs*steps; i++ {
+		assertMatch(t, words, z[i/steps], i)
+	}
+
+	for i := 0; i < steps; i++ {
+		assertMatch(t, words, aHi, Limbs*steps+i)
+	}
+}
+
+func approximateRef(x *Element) uint64 {
+
+	var asInt big.Int
+	x.ToBigInt(&asInt)
+	n := x.BitLen()
+
+	if n <= 64 {
+		return asInt.Uint64()
+	}
+
+	modulus := big.NewInt(1 << 31)
+	var lo big.Int
+	lo.Mod(&asInt, modulus)
+
+	modulus.Lsh(modulus, uint(n-64))
+	var hi big.Int
+	hi.Div(&asInt, modulus)
+	hi.Lsh(&hi, 31)
+
+	hi.Add(&hi, &lo)
+	return hi.Uint64()
+}
diff --git a/ecc/bls12-378/fr/fft/doc.go b/ecc/bls12-378/fr/fft/doc.go
new file mode 100644
index 000000000..3c35170e8
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package fft provides in-place discrete Fourier transform.
+package fft
diff --git a/ecc/bls12-378/fr/fft/domain.go b/ecc/bls12-378/fr/fft/domain.go
new file mode 100644
index 000000000..97ec9125e
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/domain.go
@@ -0,0 +1,293 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"fmt"
+	"io"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+// Domain with a power of 2 cardinality
+// compute a field element of order 2x and store it in FinerGenerator
+// all other values can be derived from x, GeneratorSqrt
+type Domain struct {
+	Cardinality             uint64
+	Depth                   uint64
+	PrecomputeReversedTable uint64 // uint64 so it is recognized by the decoder from gnark-crypto
+	CardinalityInv          fr.Element
+	Generator               fr.Element
+	GeneratorInv            fr.Element
+	FinerGenerator          fr.Element
+	FinerGeneratorInv       fr.Element
+
+	// the following slices are not serialized and are (re)computed through domain.preComputeTwiddles()
+
+	// Twiddles factor for the FFT using Generator for each stage of the recursive FFT
+	Twiddles [][]fr.Element
+
+	// Twiddles factor for the FFT using GeneratorInv for each stage of the recursive FFT
+	TwiddlesInv [][]fr.Element
+
+	// we precompute these mostly to avoid the memory intensive bit reverse permutation in the groth16.Prover
+
+	// CosetTable[i][j] = domain.Generator(i-th)Sqrt ^ j
+	// CosetTable = fft.BitReverse(CosetTable)
+	CosetTable         [][]fr.Element
+	CosetTableReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+
+	// CosetTable[i][j] = domain.Generator(i-th)SqrtInv ^ j
+	// CosetTableInv = fft.BitReverse(CosetTableInv)
+	CosetTableInv         [][]fr.Element
+	CosetTableInvReversed [][]fr.Element // optional, this is computed on demand at the creation of the domain
+}
+
+// NewDomain returns a subgroup with a power of 2 cardinality
+// cardinality >= m
+// If depth>0, the Domain will also store a primitive (2**depth)*m root
+// of 1, with associated precomputed data. This allows to perform shifted
+// FFT/FFTInv.
+// If precomputeReversedCosetTable is set, the bit reversed cosetTable/cosetTableInv are precomputed.
+//
+// example:
+// --------
+//
+// * NewDomain(m, 0, false) outputs a new domain to perform the fft on Z/mZ.
+// * NewDomain(m, 2, false) outputs a new domain to perform fft on Z/mZ, plus a primitive
+// 2**2*m=4m-th root of 1 and associated data to compute fft/fftinv on the cosets of
+// (Z/4mZ)/(Z/mZ).
+func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
+
+	// generator of the largest 2-adic subgroup
+	var rootOfUnity fr.Element
+
+	domain := &Domain{}
+	x := ecc.NextPowerOfTwo(m)
+	domain.Cardinality = uint64(x)
+	domain.Depth = depth
+	if precomputeReversedTable {
+		domain.PrecomputeReversedTable = 1
+	}
+
+	// find generator for Z/2^(log(m))Z  and Z/2^(log(m)+cosets)Z
+	logx := uint64(bits.TrailingZeros64(x))
+	if logx > maxOrderRoot {
+		panic(fmt.Sprintf("m (%d) is too big: the required root of unity does not exist", m))
+	}
+	logGen := logx + depth
+	if logGen > maxOrderRoot {
+		panic("log(m) + cosets is too big: the required root of unity does not exist")
+	}
+
+	expo := uint64(1 << (maxOrderRoot - logGen))
+	bExpo := new(big.Int).SetUint64(expo)
+	domain.FinerGenerator.Exp(rootOfUnity, bExpo)
+	domain.FinerGeneratorInv.Inverse(&domain.FinerGenerator)
+
+	// Generator = FinerGenerator^2 has order x
+	expo = uint64(1 << (maxOrderRoot - logx))
+	bExpo.SetUint64(expo)
+	domain.Generator.Exp(rootOfUnity, bExpo) // order x
+	domain.GeneratorInv.Inverse(&domain.Generator)
+	domain.CardinalityInv.SetUint64(uint64(x)).Inverse(&domain.CardinalityInv)
+
+	// twiddle factors
+	domain.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if depth > 0 && precomputeReversedTable {
+		domain.reverseCosetTables()
+	}
+
+	return domain
+}
+
+func (d *Domain) reverseCosetTables() {
+	nbCosets := (1 << d.Depth) - 1
+	d.CosetTableReversed = make([][]fr.Element, nbCosets)
+	d.CosetTableInvReversed = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTableReversed[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInvReversed[i] = make([]fr.Element, d.Cardinality)
+		copy(d.CosetTableReversed[i], d.CosetTable[i])
+		copy(d.CosetTableInvReversed[i], d.CosetTableInv[i])
+		BitReverse(d.CosetTableReversed[i])
+		BitReverse(d.CosetTableInvReversed[i])
+	}
+}
+
+func (d *Domain) preComputeTwiddles() {
+
+	// nb fft stages
+	nbStages := uint64(bits.TrailingZeros64(d.Cardinality))
+	nbCosets := (1 << d.Depth) - 1
+
+	d.Twiddles = make([][]fr.Element, nbStages)
+	d.TwiddlesInv = make([][]fr.Element, nbStages)
+	d.CosetTable = make([][]fr.Element, nbCosets)
+	d.CosetTableInv = make([][]fr.Element, nbCosets)
+	for i := 0; i < nbCosets; i++ {
+		d.CosetTable[i] = make([]fr.Element, d.Cardinality)
+		d.CosetTableInv[i] = make([]fr.Element, d.Cardinality)
+	}
+
+	var wg sync.WaitGroup
+
+	// for each fft stage, we pre compute the twiddle factors
+	twiddles := func(t [][]fr.Element, omega fr.Element) {
+		for i := uint64(0); i < nbStages; i++ {
+			t[i] = make([]fr.Element, 1+(1<<(nbStages-i-1)))
+			var w fr.Element
+			if i == 0 {
+				w = omega
+			} else {
+				w = t[i-1][2]
+			}
+			t[i][0] = fr.One()
+			t[i][1] = w
+			for j := 2; j < len(t[i]); j++ {
+				t[i][j].Mul(&t[i][j-1], &w)
+			}
+		}
+		wg.Done()
+	}
+
+	expTable := func(sqrt fr.Element, t []fr.Element) {
+		t[0] = fr.One()
+		precomputeExpTable(sqrt, t)
+		wg.Done()
+	}
+
+	if nbCosets > 0 {
+		cosetGens := make([]fr.Element, nbCosets)
+		cosetGensInv := make([]fr.Element, nbCosets)
+		cosetGens[0].Set(&d.FinerGenerator)
+		cosetGensInv[0].Set(&d.FinerGeneratorInv)
+		for i := 1; i < nbCosets; i++ {
+			cosetGens[i].Mul(&cosetGens[i-1], &d.FinerGenerator)
+			cosetGensInv[i].Mul(&cosetGensInv[i-1], &d.FinerGeneratorInv)
+		}
+		wg.Add(2 + 2*nbCosets)
+		go twiddles(d.Twiddles, d.Generator)
+		go twiddles(d.TwiddlesInv, d.GeneratorInv)
+		for i := 0; i < nbCosets-1; i++ {
+			go expTable(cosetGens[i], d.CosetTable[i])
+			go expTable(cosetGensInv[i], d.CosetTableInv[i])
+		}
+		go expTable(cosetGens[nbCosets-1], d.CosetTable[nbCosets-1])
+		expTable(cosetGensInv[nbCosets-1], d.CosetTableInv[nbCosets-1])
+
+		wg.Wait()
+
+	} else {
+		wg.Add(2)
+		go twiddles(d.Twiddles, d.Generator)
+		twiddles(d.TwiddlesInv, d.GeneratorInv)
+		wg.Wait()
+	}
+
+}
+
+func precomputeExpTable(w fr.Element, table []fr.Element) {
+	n := len(table)
+
+	// see if it makes sense to parallelize exp tables pre-computation
+	interval := 0
+	if runtime.NumCPU() >= 4 {
+		interval = (n - 1) / (runtime.NumCPU() / 4)
+	}
+
+	// this ratio roughly correspond to the number of multiplication one can do in place of a Exp operation
+	const ratioExpMul = 6000 / 17
+
+	if interval < ratioExpMul {
+		precomputeExpTableChunk(w, 1, table[1:])
+		return
+	}
+
+	// we parallelize
+	var wg sync.WaitGroup
+	for i := 1; i < n; i += interval {
+		start := i
+		end := i + interval
+		if end > n {
+			end = n
+		}
+		wg.Add(1)
+		go func() {
+			precomputeExpTableChunk(w, uint64(start), table[start:end])
+			wg.Done()
+		}()
+	}
+	wg.Wait()
+}
+
+func precomputeExpTableChunk(w fr.Element, power uint64, table []fr.Element) {
+
+	// this condition ensures that creating a domain of size 1 with cosets don't fail
+	if len(table) > 0 {
+		table[0].Exp(w, new(big.Int).SetUint64(power))
+		for i := 1; i < len(table); i++ {
+			table[i].Mul(&table[i-1], &w)
+		}
+	}
+}
+
+// WriteTo writes a binary representation of the domain (without the precomputed twiddle factors)
+// to the provided writer
+func (d *Domain) WriteTo(w io.Writer) (int64, error) {
+
+	enc := curve.NewEncoder(w)
+
+	toEncode := []interface{}{d.Cardinality, d.Depth, d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom attempts to decode a domain from Reader
+func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
+
+	dec := curve.NewDecoder(r)
+
+	toDecode := []interface{}{&d.Cardinality, &d.Depth, &d.PrecomputeReversedTable, &d.CardinalityInv, &d.Generator, &d.GeneratorInv, &d.FinerGenerator, &d.FinerGeneratorInv}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	d.preComputeTwiddles()
+
+	// store the bit reversed coset tables if needed
+	if d.Depth > 0 && d.PrecomputeReversedTable == 1 {
+		d.reverseCosetTables()
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bls12-378/fr/fft/domain_test.go b/ecc/bls12-378/fr/fft/domain_test.go
new file mode 100644
index 000000000..df72f0e3a
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/domain_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+)
+
+func TestDomainSerialization(t *testing.T) {
+
+	domain := NewDomain(1<<6, 1, true)
+	var reconstructed Domain
+
+	var buf bytes.Buffer
+	written, err := domain.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var read int64
+	read, err = reconstructed.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if written != read {
+		t.Fatal("didn't read as many bytes as we wrote")
+	}
+	if !reflect.DeepEqual(domain, &reconstructed) {
+		t.Fatal("Domain.SetBytes(Bytes()) failed")
+	}
+}
diff --git a/ecc/bls12-378/fr/fft/fft.go b/ecc/bls12-378/fr/fft/fft.go
new file mode 100644
index 000000000..66f299d78
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fft.go
@@ -0,0 +1,318 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/bits"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// Decimation is used in the FFT call to select decimation in time or in frequency
+type Decimation uint8
+
+const (
+	DIT Decimation = iota
+	DIF
+)
+
+// parallelize threshold for a single butterfly op, if the fft stage is not parallelized already
+const butterflyThreshold = 16
+
+// FFT computes (recursively) the discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+//
+// example:
+// -------
+// domain := NewDomain(m, 2) -->  contains precomputed data for Z/mZ, and Z/4mZ
+// FFT(pol, DIT, 1) --> evaluates pol on the coset 1 in (Z/4mZ)/(Z/mZ)
+func (domain *Domain) FFT(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// if coset != 0, scale by coset table
+	if coset != 0 {
+		scale := func(cosetTable []fr.Element) {
+			parallel.Execute(len(a), func(start, end int) {
+				for i := start; i < end; i++ {
+					a[i].Mul(&a[i], &cosetTable[i])
+				}
+			})
+		}
+		if decimation == DIT {
+			if domain.PrecomputeReversedTable == 0 {
+				// no precomputed coset, we adjust the index of the coset table
+				n := uint64(len(a))
+				nn := uint64(64 - bits.TrailingZeros64(n))
+				parallel.Execute(len(a), func(start, end int) {
+					for i := start; i < end; i++ {
+						irev := bits.Reverse64(uint64(i)) >> nn
+						a[i].Mul(&a[i], &domain.CosetTable[coset-1][int(irev)])
+					}
+				})
+			} else {
+				scale(domain.CosetTableReversed[coset-1])
+			}
+		} else {
+			scale(domain.CosetTable[coset-1])
+		}
+	}
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.Twiddles, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+}
+
+// FFTInverse computes (recursively) the inverse discrete Fourier transform of a and stores the result in a
+// if decimation == DIT (decimation in time), the input must be in bit-reversed order
+// if decimation == DIF (decimation in frequency), the output will be in bit-reversed order
+// coset sets the shift of the fft (0 = no shift, standard fft)
+// len(a) must be a power of 2, and w must be a len(a)th root of unity in field F.
+func (domain *Domain) FFTInverse(a []fr.Element, decimation Decimation, coset uint64) {
+
+	numCPU := uint64(runtime.NumCPU())
+
+	// find the stage where we should stop spawning go routines in our recursive calls
+	// (ie when we have as many go routines running as we have available CPUs)
+	maxSplits := bits.TrailingZeros64(ecc.NextPowerOfTwo(numCPU))
+	if numCPU <= 1 {
+		maxSplits = -1
+	}
+	switch decimation {
+	case DIF:
+		difFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	case DIT:
+		ditFFT(a, domain.TwiddlesInv, 0, maxSplits, nil)
+	default:
+		panic("not implemented")
+	}
+
+	// scale by CardinalityInv (+ cosetTableInv is coset!=0)
+	if coset == 0 {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+		return
+	}
+
+	scale := func(cosetTable []fr.Element) {
+		parallel.Execute(len(a), func(start, end int) {
+			for i := start; i < end; i++ {
+				a[i].Mul(&a[i], &cosetTable[i]).
+					Mul(&a[i], &domain.CardinalityInv)
+			}
+		})
+	}
+	if decimation == DIT {
+		scale(domain.CosetTableInv[coset-1])
+		return
+	}
+
+	// decimation == DIF
+	if domain.PrecomputeReversedTable != 0 {
+		scale(domain.CosetTableInvReversed[coset-1])
+		return
+	}
+
+	// no precomputed coset, we adjust the index of the coset table
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+	parallel.Execute(len(a), func(start, end int) {
+		for i := start; i < end; i++ {
+			irev := bits.Reverse64(uint64(i)) >> nn
+			a[i].Mul(&a[i], &domain.CosetTableInv[coset-1][int(irev)]).
+				Mul(&a[i], &domain.CardinalityInv)
+		}
+	})
+
+}
+
+func difFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIF8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for i := start; i < end; i++ {
+				fr.Butterfly(&a[i], &a[i+m])
+				a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+			}
+		}, numCPU)
+	} else {
+		// i == 0
+		fr.Butterfly(&a[0], &a[m])
+		for i := 1; i < m; i++ {
+			fr.Butterfly(&a[i], &a[i+m])
+			a[i+m].Mul(&a[i+m], &twiddles[stage][i])
+		}
+	}
+
+	if m == 1 {
+		return
+	}
+
+	nextStage := stage + 1
+	if stage < maxSplits {
+		chDone := make(chan struct{}, 1)
+		go difFFT(a[m:n], twiddles, nextStage, maxSplits, chDone)
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		difFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		difFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+	}
+
+}
+
+func ditFFT(a []fr.Element, twiddles [][]fr.Element, stage, maxSplits int, chDone chan struct{}) {
+	if chDone != nil {
+		defer close(chDone)
+	}
+	n := len(a)
+	if n == 1 {
+		return
+	} else if n == 8 {
+		kerDIT8(a, twiddles, stage)
+		return
+	}
+	m := n >> 1
+
+	nextStage := stage + 1
+
+	if stage < maxSplits {
+		// that's the only time we fire go routines
+		chDone := make(chan struct{}, 1)
+		go ditFFT(a[m:], twiddles, nextStage, maxSplits, chDone)
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		<-chDone
+	} else {
+		ditFFT(a[0:m], twiddles, nextStage, maxSplits, nil)
+		ditFFT(a[m:n], twiddles, nextStage, maxSplits, nil)
+
+	}
+
+	// if stage < maxSplits, we parallelize this butterfly
+	// but we have only numCPU / stage cpus available
+	if (m > butterflyThreshold) && (stage < maxSplits) {
+		// 1 << stage == estimated used CPUs
+		numCPU := runtime.NumCPU() / (1 << (stage))
+		parallel.Execute(m, func(start, end int) {
+			for k := start; k < end; k++ {
+				a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+				fr.Butterfly(&a[k], &a[k+m])
+			}
+		}, numCPU)
+
+	} else {
+		fr.Butterfly(&a[0], &a[m])
+		for k := 1; k < m; k++ {
+			a[k+m].Mul(&a[k+m], &twiddles[stage][k])
+			fr.Butterfly(&a[k], &a[k+m])
+		}
+	}
+}
+
+// BitReverse applies the bit-reversal permutation to a.
+// len(a) must be a power of 2 (as in every single function in this file)
+func BitReverse(a []fr.Element) {
+	n := uint64(len(a))
+	nn := uint64(64 - bits.TrailingZeros64(n))
+
+	for i := uint64(0); i < n; i++ {
+		irev := bits.Reverse64(i) >> nn
+		if irev > i {
+			a[i], a[irev] = a[irev], a[i]
+		}
+	}
+}
+
+// kerDIT8 is a kernel that process a FFT of size 8
+func kerDIT8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+	fr.Butterfly(&a[0], &a[2])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[5], &a[7])
+	fr.Butterfly(&a[0], &a[4])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	fr.Butterfly(&a[1], &a[5])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	fr.Butterfly(&a[2], &a[6])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[3], &a[7])
+}
+
+// kerDIF8 is a kernel that process a FFT of size 8
+func kerDIF8(a []fr.Element, twiddles [][]fr.Element, stage int) {
+
+	fr.Butterfly(&a[0], &a[4])
+	fr.Butterfly(&a[1], &a[5])
+	fr.Butterfly(&a[2], &a[6])
+	fr.Butterfly(&a[3], &a[7])
+	a[5].Mul(&a[5], &twiddles[stage+0][1])
+	a[6].Mul(&a[6], &twiddles[stage+0][2])
+	a[7].Mul(&a[7], &twiddles[stage+0][3])
+	fr.Butterfly(&a[0], &a[2])
+	fr.Butterfly(&a[1], &a[3])
+	fr.Butterfly(&a[4], &a[6])
+	fr.Butterfly(&a[5], &a[7])
+	a[3].Mul(&a[3], &twiddles[stage+1][1])
+	a[7].Mul(&a[7], &twiddles[stage+1][1])
+	fr.Butterfly(&a[0], &a[1])
+	fr.Butterfly(&a[2], &a[3])
+	fr.Butterfly(&a[4], &a[5])
+	fr.Butterfly(&a[6], &a[7])
+}
diff --git a/ecc/bls12-378/fr/fft/fft_test.go b/ecc/bls12-378/fr/fft/fft_test.go
new file mode 100644
index 000000000..c7416fff7
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fft_test.go
@@ -0,0 +1,413 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"math/big"
+	"strconv"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/gen"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestFFT(t *testing.T) {
+	const maxSize = 1 << 10
+
+	nbCosets := 3
+	domainWithPrecompute := NewDomain(maxSize, 2, true)
+	domainWOPrecompute := NewDomain(maxSize, 2, false)
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 5
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("DIF FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 0)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets with precomputed values should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWithPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIF FFT on cosets W/O precompute should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFT(pol, DIF, 1)
+			BitReverse(pol)
+
+			sample := domainWOPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower))).
+				Mul(&sample, &domainWOPrecompute.FinerGenerator)
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("DIT FFT should be consistent with dual basis", prop.ForAll(
+
+		// checks that a random evaluation of a dual function eval(gen**ithpower) is consistent with the FFT result
+		func(ithpower int) bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			sample := domainWithPrecompute.Generator
+			sample.Exp(sample, big.NewInt(int64(ithpower)))
+
+			eval := evaluatePolynomial(backupPol, sample)
+
+			return eval.Equal(&pol[ithpower])
+
+		},
+		gen.IntRange(0, maxSize-1),
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			BitReverse(pol)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			BitReverse(pol)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && pol[i].Equal(&backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWithPrecompute.FFT(pol, DIT, uint64(i))
+				domainWithPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("bitReverse(DIF FFT(DIT FFT (bitReverse))))==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			check := true
+
+			for i := 1; i <= nbCosets; i++ {
+
+				BitReverse(pol)
+				domainWOPrecompute.FFT(pol, DIT, uint64(i))
+				domainWOPrecompute.FFTInverse(pol, DIF, uint64(i))
+				BitReverse(pol)
+
+				for i := 0; i < len(pol); i++ {
+					check = check && pol[i].Equal(&backupPol[i])
+				}
+			}
+
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 0)
+			domainWithPrecompute.FFT(pol, DIT, 0)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, with precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWithPrecompute.FFTInverse(pol, DIF, 1)
+			domainWithPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.Property("DIT FFT(DIF FFT)==id on cosets, without precomputed values", prop.ForAll(
+
+		func() bool {
+
+			pol := make([]fr.Element, maxSize)
+			backupPol := make([]fr.Element, maxSize)
+
+			for i := 0; i < maxSize; i++ {
+				pol[i].SetRandom()
+			}
+			copy(backupPol, pol)
+
+			domainWOPrecompute.FFTInverse(pol, DIF, 1)
+			domainWOPrecompute.FFT(pol, DIT, 1)
+
+			check := true
+			for i := 0; i < len(pol); i++ {
+				check = check && (pol[i] == backupPol[i])
+			}
+			return check
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// --------------------------------------------------------------------
+// benches
+func BenchmarkBitReverse(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		b.Run("bit reversing 2**"+strconv.Itoa(i)+"bits", func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				BitReverse(pol[:1<<i])
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFT(b *testing.B) {
+
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	for i := 8; i < 20; i++ {
+		sizeDomain := 1 << i
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (no cosets)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 0, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 0)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets without precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, false)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+		b.Run("fft 2**"+strconv.Itoa(i)+"bits (cosets with precomputations)", func(b *testing.B) {
+			domain := NewDomain(uint64(sizeDomain), 1, true)
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				domain.FFT(pol[:sizeDomain], DIT, 1)
+			}
+		})
+	}
+
+}
+
+func BenchmarkFFTDITCosetReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIT, 1)
+	}
+}
+
+func BenchmarkFFTDIFReference(b *testing.B) {
+	const maxSize = 1 << 20
+
+	pol := make([]fr.Element, maxSize)
+	pol[0].SetRandom()
+	for i := 1; i < maxSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	domain := NewDomain(maxSize, 0, false)
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		domain.FFT(pol, DIF, 0)
+	}
+}
+
+func evaluatePolynomial(pol []fr.Element, val fr.Element) fr.Element {
+	var acc, res, tmp fr.Element
+	res.Set(&pol[0])
+	acc.Set(&val)
+	for i := 1; i < len(pol); i++ {
+		tmp.Mul(&acc, &pol[i])
+		res.Add(&res, &tmp)
+		acc.Mul(&acc, &val)
+	}
+	return res
+}
diff --git a/ecc/bls12-378/fr/fft/fuzz.go b/ecc/bls12-378/fr/fft/fuzz.go
new file mode 100644
index 000000000..1c25b2420
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fuzz.go
@@ -0,0 +1,73 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"bytes"
+	"fmt"
+	"github.com/consensys/gnark-crypto/ecc"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	r := bytes.NewReader(data)
+
+	// random polynomial
+	size := len(data) / 8
+	if size == 0 {
+		return fuzzDiscard
+	}
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	paddedSize := ecc.NextPowerOfTwo(uint64(size))
+	p1 := make([]fr.Element, paddedSize)
+	p2 := make([]fr.Element, paddedSize)
+	for i := 0; i < len(p1); i++ {
+		p1[i].SetRawBytes(r)
+	}
+	copy(p2, p1)
+
+	// fft domain
+	nbCosets := uint64(uint8(data[0]) % 3)
+	domainWithPrecompute := NewDomain(paddedSize, nbCosets, true)
+	domainWOPrecompute := NewDomain(paddedSize, nbCosets, false)
+
+	// bitReverse(DIF FFT(DIT FFT (bitReverse))))==id
+	for i := uint64(0); i < nbCosets; i++ {
+		BitReverse(p1)
+		domainWithPrecompute.FFT(p1, DIT, i)
+		domainWOPrecompute.FFTInverse(p1, DIF, i)
+		BitReverse(p1)
+
+		for i := 0; i < len(p1); i++ {
+			if !p1[i].Equal(&p2[i]) {
+				panic(fmt.Sprintf("bitReverse(DIF FFT(DIT FFT (bitReverse)))) != id, size %d", size))
+			}
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/fft/fuzz_test.go b/ecc/bls12-378/fr/fft/fuzz_test.go
new file mode 100644
index 000000000..9890547c0
--- /dev/null
+++ b/ecc/bls12-378/fr/fft/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fft
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/fr/kzg/doc.go b/ecc/bls12-378/fr/kzg/doc.go
new file mode 100644
index 000000000..d8a77e8f6
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package kzg provides a KZG commitment scheme.
+package kzg
diff --git a/ecc/bls12-378/fr/kzg/fuzz.go b/ecc/bls12-378/fr/kzg/fuzz.go
new file mode 100644
index 000000000..0418cc962
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/fuzz.go
@@ -0,0 +1,84 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	if len(data) == 0 {
+		return fuzzDiscard
+	}
+	size := int(uint8(data[0])) + 2 // TODO fix min size in NewScheme
+	if size > (1 << 15) {
+		size = 1 << 15
+	}
+	r := bytes.NewReader(data[1:])
+	var alpha, point fr.Element
+	alpha.SetRawBytes(r)
+	point.SetRawBytes(r)
+	s := NewScheme(size, alpha)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, size/2)
+	for i := 0; i < len(f); i++ {
+		f[i] = make(polynomial.Polynomial, size)
+		for j := 0; j < len(f[i]); j++ {
+			f[i][j].SetRawBytes(r)
+		}
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, size/2)
+	for i := 0; i < len(digests); i++ {
+		digests[i], _ = s.Commit(f[i])
+
+	}
+
+	proof, err := s.BatchOpenSinglePoint(&point, digests, f)
+	if err != nil {
+		panic(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < len(f); i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			panic("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = s.BatchVerifySinglePoint(digests, &proof)
+	if err != nil {
+		panic(err)
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/kzg/fuzz_test.go b/ecc/bls12-378/fr/kzg/fuzz_test.go
new file mode 100644
index 000000000..8379a59c7
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/fr/kzg/kzg.go b/ecc/bls12-378/fr/kzg/kzg.go
new file mode 100644
index 000000000..9a42a84d2
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/kzg.go
@@ -0,0 +1,518 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"errors"
+	"hash"
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	"github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrInvalidNbDigests              = errors.New("number of digests is not the same as the number of polynomials")
+	ErrInvalidPolynomialSize         = errors.New("invalid polynomial size (larger than SRS or == 0)")
+	ErrVerifyOpeningProof            = errors.New("can't verify opening proof")
+	ErrVerifyBatchOpeningSinglePoint = errors.New("can't verify batch opening proof at single point")
+	ErrMinSRSSize                    = errors.New("minimum srs size is 2")
+)
+
+// Digest commitment of a polynomial.
+type Digest = bls12378.G1Affine
+
+// SRS stores the result of the MPC
+type SRS struct {
+	G1 []bls12378.G1Affine  // [gen [alpha]gen , [alpha**2]gen, ... ]
+	G2 [2]bls12378.G2Affine // [gen, [alpha]gen ]
+}
+
+// NewSRS returns a new SRS using alpha as randomness source
+//
+// In production, a SRS generated through MPC should be used.
+//
+// implements io.ReaderFrom and io.WriterTo
+func NewSRS(size uint64, bAlpha *big.Int) (*SRS, error) {
+	if size < 2 {
+		return nil, ErrMinSRSSize
+	}
+	var srs SRS
+	srs.G1 = make([]bls12378.G1Affine, size)
+
+	var alpha fr.Element
+	alpha.SetBigInt(bAlpha)
+
+	_, _, gen1Aff, gen2Aff := bls12378.Generators()
+	srs.G1[0] = gen1Aff
+	srs.G2[0] = gen2Aff
+	srs.G2[1].ScalarMultiplication(&gen2Aff, bAlpha)
+
+	alphas := make([]fr.Element, size-1)
+	alphas[0] = alpha
+	for i := 1; i < len(alphas); i++ {
+		alphas[i].Mul(&alphas[i-1], &alpha)
+	}
+	for i := 0; i < len(alphas); i++ {
+		alphas[i].FromMont()
+	}
+	g1s := bls12378.BatchScalarMultiplicationG1(&gen1Aff, alphas)
+	copy(srs.G1[1:], g1s)
+
+	return &srs, nil
+}
+
+// OpeningProof KZG proof for opening at a single point.
+//
+// implements io.ReaderFrom and io.WriterTo
+type OpeningProof struct {
+	// H quotient polynomial (f - f(z))/(x-z)
+	H bls12378.G1Affine
+
+	// Point at which the polynomial is evaluated
+	Point fr.Element
+
+	// ClaimedValue purported value
+	ClaimedValue fr.Element
+}
+
+// BatchOpeningProof opening proof for many polynomials at the same point
+//
+// implements io.ReaderFrom and io.WriterTo
+type BatchOpeningProof struct {
+	// H quotient polynomial Sum_i gamma**i*(f - f(z))/(x-z)
+	H bls12378.G1Affine
+
+	// Point at which the polynomials are evaluated
+	Point fr.Element
+
+	// ClaimedValues purported values
+	ClaimedValues []fr.Element
+}
+
+// Commit commits to a polynomial using a multi exponentiation with the SRS.
+// It is assumed that the polynomial is in canonical form, in Montgomery form.
+func Commit(p polynomial.Polynomial, srs *SRS, nbTasks ...int) (Digest, error) {
+
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return Digest{}, ErrInvalidPolynomialSize
+	}
+
+	var res bls12378.G1Affine
+
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	if len(nbTasks) > 0 {
+		config.NbTasks = nbTasks[0]
+	}
+	if _, err := res.MultiExp(srs.G1[:len(p)], p, config); err != nil {
+		return Digest{}, err
+	}
+
+	return res, nil
+}
+
+// Open computes an opening proof of polynomial p at given point.
+// fft.Domain Cardinality must be larger than p.Degree()
+func Open(p polynomial.Polynomial, point *fr.Element, domain *fft.Domain, srs *SRS) (OpeningProof, error) {
+	if len(p) == 0 || len(p) > len(srs.G1) {
+		return OpeningProof{}, ErrInvalidPolynomialSize
+	}
+
+	// build the proof
+	res := OpeningProof{
+		Point:        *point,
+		ClaimedValue: p.Eval(point),
+	}
+
+	// compute H
+	_p := make(polynomial.Polynomial, len(p))
+	copy(_p, p)
+	h := dividePolyByXminusA(_p, res.ClaimedValue, res.Point)
+
+	_p = nil // h re-use this memory
+
+	// commit to H
+	hCommit, err := Commit(h, srs)
+	if err != nil {
+		return OpeningProof{}, err
+	}
+	res.H.Set(&hCommit)
+
+	return res, nil
+}
+
+// Verify verifies a KZG opening proof at a single point
+func Verify(commitment *Digest, proof *OpeningProof, srs *SRS) error {
+
+	// comm(f(a))
+	var claimedValueG1Aff bls12378.G1Affine
+	var claimedValueBigInt big.Int
+	proof.ClaimedValue.ToBigIntRegular(&claimedValueBigInt)
+	claimedValueG1Aff.ScalarMultiplication(&srs.G1[0], &claimedValueBigInt)
+
+	// [f(alpha) - f(a)]G1Jac
+	var fminusfaG1Jac, tmpG1Jac bls12378.G1Jac
+	fminusfaG1Jac.FromAffine(commitment)
+	tmpG1Jac.FromAffine(&claimedValueG1Aff)
+	fminusfaG1Jac.SubAssign(&tmpG1Jac)
+
+	// [-H(alpha)]G1Aff
+	var negH bls12378.G1Affine
+	negH.Neg(&proof.H)
+
+	// [alpha-a]G2Jac
+	var alphaMinusaG2Jac, genG2Jac, alphaG2Jac bls12378.G2Jac
+	var pointBigInt big.Int
+	proof.Point.ToBigIntRegular(&pointBigInt)
+	genG2Jac.FromAffine(&srs.G2[0])
+	alphaG2Jac.FromAffine(&srs.G2[1])
+	alphaMinusaG2Jac.ScalarMultiplication(&genG2Jac, &pointBigInt).
+		Neg(&alphaMinusaG2Jac).
+		AddAssign(&alphaG2Jac)
+
+	// [alpha-a]G2Aff
+	var xminusaG2Aff bls12378.G2Affine
+	xminusaG2Aff.FromJacobian(&alphaMinusaG2Jac)
+
+	// [f(alpha) - f(a)]G1Aff
+	var fminusfaG1Aff bls12378.G1Affine
+	fminusfaG1Aff.FromJacobian(&fminusfaG1Jac)
+
+	// e([-H(alpha)]G1Aff, G2gen).e([-H(alpha)]G1Aff, [alpha-a]G2Aff) ==? 1
+	check, err := bls12378.PairingCheck(
+		[]bls12378.G1Affine{fminusfaG1Aff, negH},
+		[]bls12378.G2Affine{srs.G2[0], xminusaG2Aff},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+}
+
+// BatchOpenSinglePoint creates a batch opening proof at _val of a list of polynomials.
+// It's an interactive protocol, made non interactive using Fiat Shamir.
+// point is the point at which the polynomials are opened.
+// digests is the list of committed polynomials to open, need to derive the challenge using Fiat Shamir.
+// polynomials is the list of polynomials to open.
+func BatchOpenSinglePoint(polynomials []polynomial.Polynomial, digests []Digest, point *fr.Element, hf hash.Hash, domain *fft.Domain, srs *SRS) (BatchOpeningProof, error) {
+
+	// check for invalid sizes
+	nbDigests := len(digests)
+	if nbDigests != len(polynomials) {
+		return BatchOpeningProof{}, ErrInvalidNbDigests
+	}
+	largestPoly := -1
+	for _, p := range polynomials {
+		if len(p) == 0 || len(p) > len(srs.G1) {
+			return BatchOpeningProof{}, ErrInvalidPolynomialSize
+		}
+		if len(p) > largestPoly {
+			largestPoly = len(p)
+		}
+	}
+
+	var res BatchOpeningProof
+
+	// compute the purported values
+	res.ClaimedValues = make([]fr.Element, len(polynomials))
+	var wg sync.WaitGroup
+	wg.Add(len(polynomials))
+	for i := 0; i < len(polynomials); i++ {
+		go func(at int) {
+			res.ClaimedValues[at] = polynomials[at].Eval(point)
+			wg.Done()
+		}(i)
+	}
+
+	// set the point at which the evaluation is done
+	res.Point = *point
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(res.Point, digests, hf)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	// compute sum_i gamma**i*f(a)
+	var sumGammaiTimesEval fr.Element
+	chSumGammai := make(chan struct{}, 1)
+	go func() {
+		// wait for polynomial evaluations to be completed (res.ClaimedValues)
+		wg.Wait()
+		sumGammaiTimesEval = res.ClaimedValues[nbDigests-1]
+		for i := nbDigests - 2; i >= 0; i-- {
+			sumGammaiTimesEval.Mul(&sumGammaiTimesEval, &gamma).
+				Add(&sumGammaiTimesEval, &res.ClaimedValues[i])
+		}
+		close(chSumGammai)
+	}()
+
+	// compute sum_i gamma**i*f
+	// that is p0 + gamma * p1 + gamma^2 * p2 + ... gamma^n * pn
+	// note: if we are willing to paralellize that, we could clone the poly and scale them by
+	// gamma n in parallel, before reducing into sumGammaiTimesPol
+	sumGammaiTimesPol := make(polynomial.Polynomial, largestPoly)
+	copy(sumGammaiTimesPol, polynomials[0])
+	gammaN := gamma
+	var pj fr.Element
+	for i := 1; i < len(polynomials); i++ {
+		for j := 0; j < len(polynomials[i]); j++ {
+			pj.Mul(&polynomials[i][j], &gammaN)
+			sumGammaiTimesPol[j].Add(&sumGammaiTimesPol[j], &pj)
+		}
+		gammaN.Mul(&gammaN, &gamma)
+	}
+
+	// compute H
+	<-chSumGammai
+	h := dividePolyByXminusA(sumGammaiTimesPol, sumGammaiTimesEval, res.Point)
+	sumGammaiTimesPol = nil // same memory as h
+
+	res.H, err = Commit(h, srs)
+	if err != nil {
+		return BatchOpeningProof{}, err
+	}
+
+	return res, nil
+}
+
+// FoldProof fold the digests and the proofs in batchOpeningProof using Fiat Shamir
+// to obtain an opening proof at a single point.
+//
+// * digests list of digests on which batchOpeningProof is based
+// * batchOpeningProof opening proof of digests
+// * returns the folded version of batchOpeningProof, Digest, the folded version of digests
+func FoldProof(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash) (OpeningProof, Digest, error) {
+
+	nbDigests := len(digests)
+
+	// check consistancy between numbers of claims vs number of digests
+	if nbDigests != len(batchOpeningProof.ClaimedValues) {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// derive the challenge gamma, binded to the point and the commitments
+	gamma, err := deriveGamma(batchOpeningProof.Point, digests, hf)
+	if err != nil {
+		return OpeningProof{}, Digest{}, ErrInvalidNbDigests
+	}
+
+	// fold the claimed values and digests
+	gammai := make([]fr.Element, nbDigests)
+	gammai[0].SetOne()
+	for i := 1; i < nbDigests; i++ {
+		gammai[i].Mul(&gammai[i-1], &gamma)
+	}
+	foldedDigests, foldedEvaluations, err := fold(digests, batchOpeningProof.ClaimedValues, gammai)
+	if err != nil {
+		return OpeningProof{}, Digest{}, err
+	}
+
+	// create the folded opening proof
+	var res OpeningProof
+	res.ClaimedValue.Set(&foldedEvaluations)
+	res.H.Set(&batchOpeningProof.H)
+	res.Point.Set(&batchOpeningProof.Point)
+
+	return res, foldedDigests, nil
+}
+
+// BatchVerifySinglePoint verifies a batched opening proof at a single point of a list of polynomials.
+//
+// * digests list of digests on which opening proof is done
+// * batchOpeningProof proof of correct opening on the digests
+func BatchVerifySinglePoint(digests []Digest, batchOpeningProof *BatchOpeningProof, hf hash.Hash, srs *SRS) error {
+
+	// fold the proof
+	foldedProof, foldedDigest, err := FoldProof(digests, batchOpeningProof, hf)
+	if err != nil {
+		return err
+	}
+
+	// verify the foldedProof againts the foldedDigest
+	err = Verify(&foldedDigest, &foldedProof, srs)
+	return err
+
+}
+
+// BatchVerifyMultiPoints batch verifies a list of opening proofs at different points.
+// The purpose of the batching is to have only one pairing for verifying several proofs.
+//
+// * digests list of committed polynomials which are opened
+// * proofs list of opening proofs of the digest
+func BatchVerifyMultiPoints(digests []Digest, proofs []OpeningProof, srs *SRS) error {
+
+	// check consistancy nb proogs vs nb digests
+	if len(digests) != len(proofs) {
+		return ErrInvalidNbDigests
+	}
+
+	// if only one digest, call Verify
+	if len(digests) == 1 {
+		return Verify(&digests[0], &proofs[0], srs)
+	}
+
+	// sample random numbers for sampling
+	randomNumbers := make([]fr.Element, len(digests))
+	randomNumbers[0].SetOne()
+	for i := 1; i < len(randomNumbers); i++ {
+		_, err := randomNumbers[i].SetRandom()
+		if err != nil {
+			return err
+		}
+	}
+
+	// combine random_i*quotient_i
+	var foldedQuotients bls12378.G1Affine
+	quotients := make([]bls12378.G1Affine, len(proofs))
+	for i := 0; i < len(randomNumbers); i++ {
+		quotients[i].Set(&proofs[i].H)
+	}
+	config := ecc.MultiExpConfig{ScalarsMont: true}
+	_, err := foldedQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return nil
+	}
+
+	// fold digests and evals
+	evals := make([]fr.Element, len(digests))
+	for i := 0; i < len(randomNumbers); i++ {
+		evals[i].Set(&proofs[i].ClaimedValue)
+	}
+	foldedDigests, foldedEvals, err := fold(digests, evals, randomNumbers)
+	if err != nil {
+		return err
+	}
+
+	// compute commitment to folded Eval
+	var foldedEvalsCommit bls12378.G1Affine
+	var foldedEvalsBigInt big.Int
+	foldedEvals.ToBigIntRegular(&foldedEvalsBigInt)
+	foldedEvalsCommit.ScalarMultiplication(&srs.G1[0], &foldedEvalsBigInt)
+
+	// compute F = foldedDigests - foldedEvalsCommit
+	foldedDigests.Sub(&foldedDigests, &foldedEvalsCommit)
+
+	// combine random_i*(point_i*quotient_i)
+	var foldedPointsQuotients bls12378.G1Affine
+	for i := 0; i < len(randomNumbers); i++ {
+		randomNumbers[i].Mul(&randomNumbers[i], &proofs[i].Point)
+	}
+	_, err = foldedPointsQuotients.MultiExp(quotients, randomNumbers, config)
+	if err != nil {
+		return err
+	}
+
+	// lhs first pairing
+	foldedDigests.Add(&foldedDigests, &foldedPointsQuotients)
+
+	// lhs second pairing
+	foldedQuotients.Neg(&foldedQuotients)
+
+	// pairing check
+	check, err := bls12378.PairingCheck(
+		[]bls12378.G1Affine{foldedDigests, foldedQuotients},
+		[]bls12378.G2Affine{srs.G2[0], srs.G2[1]},
+	)
+	if err != nil {
+		return err
+	}
+	if !check {
+		return ErrVerifyOpeningProof
+	}
+	return nil
+
+}
+
+// fold folds digests and evaluations using the list of factors as random numbers.
+//
+// * digests list of digests to fold
+// * evaluations list of evaluations to fold
+// * factors list of multiplicative factors used for the folding (in Montgomery form)
+func fold(digests []Digest, evaluations []fr.Element, factors []fr.Element) (Digest, fr.Element, error) {
+
+	// length inconsistancy between digests and evaluations should have been done before calling this function
+	nbDigests := len(digests)
+
+	// fold the claimed values
+	var foldedEvaluations, tmp fr.Element
+	for i := 0; i < nbDigests; i++ {
+		tmp.Mul(&evaluations[i], &factors[i])
+		foldedEvaluations.Add(&foldedEvaluations, &tmp)
+	}
+
+	// fold the digests
+	var foldedDigests Digest
+	_, err := foldedDigests.MultiExp(digests, factors, ecc.MultiExpConfig{ScalarsMont: true})
+	if err != nil {
+		return foldedDigests, foldedEvaluations, err
+	}
+
+	// folding done
+	return foldedDigests, foldedEvaluations, nil
+
+}
+
+// deriveGamma derives a challenge using Fiat Shamir to fold proofs.
+func deriveGamma(point fr.Element, digests []Digest, hf hash.Hash) (fr.Element, error) {
+
+	// derive the challenge gamma, binded to the point and the commitments
+	fs := fiatshamir.NewTranscript(hf, "gamma")
+	if err := fs.Bind("gamma", point.Marshal()); err != nil {
+		return fr.Element{}, err
+	}
+	for i := 0; i < len(digests); i++ {
+		if err := fs.Bind("gamma", digests[i].Marshal()); err != nil {
+			return fr.Element{}, err
+		}
+	}
+	gammaByte, err := fs.ComputeChallenge("gamma")
+	if err != nil {
+		return fr.Element{}, err
+	}
+	var gamma fr.Element
+	gamma.SetBytes(gammaByte)
+
+	return gamma, nil
+}
+
+// dividePolyByXminusA computes (f-f(a))/(x-a), in canonical basis, in regular form
+// f memory is re-used for the result
+func dividePolyByXminusA(f polynomial.Polynomial, fa, a fr.Element) polynomial.Polynomial {
+
+	// first we compute f-f(a)
+	f[0].Sub(&f[0], &fa)
+
+	// now we use syntetic division to divide by x-a
+	var t fr.Element
+	for i := len(f) - 2; i >= 0; i-- {
+		t.Mul(&f[i+1], &a)
+
+		f[i].Add(&f[i], &t)
+	}
+
+	// the result is of degree deg(f)-1
+	return f[1:]
+}
diff --git a/ecc/bls12-378/fr/kzg/kzg_test.go b/ecc/bls12-378/fr/kzg/kzg_test.go
new file mode 100644
index 000000000..837f2c305
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/kzg_test.go
@@ -0,0 +1,453 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"math/big"
+	"reflect"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+)
+
+// testSRS re-used accross tests of the KZG scheme
+var testSRS *SRS
+
+func init() {
+	const srsSize = 230
+	testSRS, _ = NewSRS(ecc.NextPowerOfTwo(srsSize), new(big.Int).SetInt64(42))
+}
+
+func TestDividePolyByXminusA(t *testing.T) {
+
+	const pSize = 230
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+
+	// evaluate the polynomial at a random point
+	var point fr.Element
+	point.SetRandom()
+	evaluation := pol.Eval(&point)
+
+	// probabilistic test (using Schwartz Zippel lemma, evaluation at one point is enough)
+	var randPoint, xminusa fr.Element
+	randPoint.SetRandom()
+	polRandpoint := pol.Eval(&randPoint)
+	polRandpoint.Sub(&polRandpoint, &evaluation) // f(rand)-f(point)
+
+	// compute f-f(a)/x-a
+	h := dividePolyByXminusA(pol, evaluation, point)
+	pol = nil // h reuses this memory
+
+	if len(h) != 229 {
+		t.Fatal("inconsistant size of quotient")
+	}
+
+	hRandPoint := h.Eval(&randPoint)
+	xminusa.Sub(&randPoint, &point) // rand-point
+
+	// f(rand)-f(point)	==? h(rand)*(rand-point)
+	hRandPoint.Mul(&hRandPoint, &xminusa)
+
+	if !hRandPoint.Equal(&polRandpoint) {
+		t.Fatal("Error f-f(a)/x-a")
+	}
+}
+
+func TestSerializationSRS(t *testing.T) {
+
+	// create a SRS
+	srs, err := NewSRS(64, new(big.Int).SetInt64(42))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// serialize it...
+	var buf bytes.Buffer
+	_, err = srs.WriteTo(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// reconstruct the SRS
+	var _srs SRS
+	_, err = _srs.ReadFrom(&buf)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compare
+	if !reflect.DeepEqual(srs, &_srs) {
+		t.Fatal("scheme serialization failed")
+	}
+
+}
+
+func TestCommit(t *testing.T) {
+
+	// create a polynomial
+	f := make(polynomial.Polynomial, 60)
+	for i := 0; i < 60; i++ {
+		f[i].SetRandom()
+	}
+
+	// commit using the method from KZG
+	_kzgCommit, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	var kzgCommit bls12378.G1Affine
+	kzgCommit.Unmarshal(_kzgCommit.Marshal())
+
+	// check commitment using manual commit
+	var x fr.Element
+	x.SetString("42")
+	fx := f.Eval(&x)
+	var fxbi big.Int
+	fx.ToBigIntRegular(&fxbi)
+	var manualCommit bls12378.G1Affine
+	manualCommit.Set(&testSRS.G1[0])
+	manualCommit.ScalarMultiplication(&manualCommit, &fxbi)
+
+	// compare both results
+	if !kzgCommit.Equal(&manualCommit) {
+		t.Fatal("error KZG commitment")
+	}
+
+}
+
+func TestVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create a polynomial
+	f := randomPolynomial(60)
+
+	// commit the polynomial
+	digest, err := Commit(f, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := Open(f, &point, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed valued
+	expected := f.Eval(&point)
+	if !proof.ClaimedValue.Equal(&expected) {
+		t.Fatal("inconsistant claimed value")
+	}
+
+	// verify correct proof
+	err = Verify(&digest, &proof, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValue.Double(&proof.ClaimedValue)
+	err = Verify(&digest, &proof, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+}
+
+func TestBatchVerifySinglePoint(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute opening proof at a random point
+	var point fr.Element
+	point.SetString("4321")
+	proof, err := BatchOpenSinglePoint(f, digests, &point, hf, domain, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify the claimed values
+	for i := 0; i < 10; i++ {
+		expectedClaim := f[i].Eval(&point)
+		if !expectedClaim.Equal(&proof.ClaimedValues[i]) {
+			t.Fatal("inconsistant claimed values")
+		}
+	}
+
+	// verify correct proof
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verify wrong proof
+	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+	if err == nil {
+		t.Fatal("verifying wrong proof should have failed")
+	}
+
+}
+
+func TestBatchVerifyMultiPoints(t *testing.T) {
+
+	domain := fft.NewDomain(64, 0, false)
+
+	// create polynomials
+	f := make([]polynomial.Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f[i] = randomPolynomial(40 + 2*i)
+	}
+
+	// commit the polynomials
+	digests := make([]Digest, 10)
+	for i := 0; i < 10; i++ {
+		digests[i], _ = Commit(f[i], testSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	// compute 2 batch opening proofs at 2 random points
+	points := make([]fr.Element, 2)
+	batchProofs := make([]BatchOpeningProof, 2)
+	points[0].SetRandom()
+	batchProofs[0], _ = BatchOpenSinglePoint(f[:5], digests[:5], &points[0], hf, domain, testSRS)
+	points[1].SetRandom()
+	batchProofs[1], _ = BatchOpenSinglePoint(f[5:], digests[5:], &points[1], hf, domain, testSRS)
+
+	// fold the 2 batch opening proofs
+	proofs := make([]OpeningProof, 2)
+	foldedDigests := make([]Digest, 2)
+	proofs[0], foldedDigests[0], _ = FoldProof(digests[:5], &batchProofs[0], hf)
+	proofs[1], foldedDigests[1], _ = FoldProof(digests[5:], &batchProofs[1], hf)
+
+	// check the the individual batch proofs are correct
+	err := Verify(&foldedDigests[0], &proofs[0], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = Verify(&foldedDigests[1], &proofs[1], testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify correct folded proofs
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// batch verify tampered folded proofs
+	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+	if err == nil {
+		t.Fatal(err)
+	}
+
+}
+
+const benchSize = 1 << 16
+
+func BenchmarkKZGCommit(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Commit(p, benchSRS)
+	}
+}
+
+func BenchmarkDivideByXMinusA(b *testing.B) {
+	const pSize = 1 << 22
+
+	// build random polynomial
+	pol := make(polynomial.Polynomial, pSize)
+	pol[0].SetRandom()
+	for i := 1; i < pSize; i++ {
+		pol[i] = pol[i-1]
+	}
+	var a, fa fr.Element
+	a.SetRandom()
+	fa.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		dividePolyByXminusA(pol, fa, a)
+		pol = pol[:pSize]
+		pol[pSize-1] = pol[0]
+	}
+}
+
+func BenchmarkKZGOpen(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		_, _ = Open(p, &r, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGVerify(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	// kzg scheme
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// random polynomial
+	p := randomPolynomial(benchSize / 2)
+	var r fr.Element
+	r.SetRandom()
+
+	// commit
+	comm, err := Commit(p, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	// open
+	openingProof, err := Open(p, &r, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Verify(&comm, &openingProof, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchOpen10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	}
+}
+
+func BenchmarkKZGBatchVerify10(b *testing.B) {
+	benchSRS, err := NewSRS(ecc.NextPowerOfTwo(benchSize), new(big.Int).SetInt64(42))
+	if err != nil {
+		b.Fatal(err)
+	}
+	domain := fft.NewDomain(uint64(benchSize), 0, false)
+
+	// 10 random polynomials
+	var ps [10]polynomial.Polynomial
+	for i := 0; i < 10; i++ {
+		ps[i] = randomPolynomial(benchSize / 2)
+	}
+
+	// commitments
+	var commitments [10]Digest
+	for i := 0; i < 10; i++ {
+		commitments[i], _ = Commit(ps[i], benchSRS)
+	}
+
+	// pick a hash function
+	hf := sha256.New()
+
+	var r fr.Element
+	r.SetRandom()
+
+	proof, err := BatchOpenSinglePoint(ps[:], commitments[:], &r, hf, domain, benchSRS)
+	if err != nil {
+		b.Fatal(err)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		BatchVerifySinglePoint(commitments[:], &proof, hf, benchSRS)
+	}
+}
+
+func randomPolynomial(size int) polynomial.Polynomial {
+	f := make(polynomial.Polynomial, size)
+	for i := 0; i < size; i++ {
+		f[i].SetRandom()
+	}
+	return f
+}
diff --git a/ecc/bls12-378/fr/kzg/marshal.go b/ecc/bls12-378/fr/kzg/marshal.go
new file mode 100644
index 000000000..9805f2fb1
--- /dev/null
+++ b/ecc/bls12-378/fr/kzg/marshal.go
@@ -0,0 +1,138 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package kzg
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"io"
+)
+
+// WriteTo writes binary encoding of the SRS
+func (srs *SRS) WriteTo(w io.Writer) (int64, error) {
+	// encode the SRS
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		srs.G1,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes SRS data from reader.
+func (srs *SRS) ReadFrom(r io.Reader) (int64, error) {
+	// decode the SRS
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&srs.G2[0],
+		&srs.G2[1],
+		&srs.G1,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a OpeningProof
+func (proof *OpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes OpeningProof data from reader.
+func (proof *OpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValue,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
+
+// WriteTo writes binary encoding of a BatchOpeningProof
+func (proof *BatchOpeningProof) WriteTo(w io.Writer) (int64, error) {
+	enc := bls12378.NewEncoder(w)
+
+	toEncode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		proof.ClaimedValues,
+	}
+
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			return enc.BytesWritten(), err
+		}
+	}
+
+	return enc.BytesWritten(), nil
+}
+
+// ReadFrom decodes BatchOpeningProof data from reader.
+func (proof *BatchOpeningProof) ReadFrom(r io.Reader) (int64, error) {
+	dec := bls12378.NewDecoder(r)
+
+	toDecode := []interface{}{
+		&proof.H,
+		&proof.Point,
+		&proof.ClaimedValues,
+	}
+
+	for _, v := range toDecode {
+		if err := dec.Decode(v); err != nil {
+			return dec.BytesRead(), err
+		}
+	}
+
+	return dec.BytesRead(), nil
+}
diff --git a/ecc/bls12-378/fr/mimc/doc.go b/ecc/bls12-378/fr/mimc/doc.go
new file mode 100644
index 000000000..497bd40a9
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package mimc provides MiMC hash function using Miyaguchi–Preneel construction.
+package mimc
diff --git a/ecc/bls12-378/fr/mimc/fuzz.go b/ecc/bls12-378/fr/mimc/fuzz.go
new file mode 100644
index 000000000..41b557cf3
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/fuzz.go
@@ -0,0 +1,34 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	var s []byte
+	h := NewMiMC(string(data))
+	h.Write(data)
+	h.Sum(s)
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fr/mimc/mimc.go b/ecc/bls12-378/fr/mimc/mimc.go
new file mode 100644
index 000000000..b149eb88e
--- /dev/null
+++ b/ecc/bls12-378/fr/mimc/mimc.go
@@ -0,0 +1,174 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package mimc
+
+import (
+	"hash"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"golang.org/x/crypto/sha3"
+)
+
+const mimcNbRounds = 91
+
+// BlockSize size that mimc consumes
+const BlockSize = fr.Bytes
+
+// Params constants for the mimc hash function
+type Params []fr.Element
+
+// NewParams creates new mimc object
+func NewParams(seed string) Params {
+
+	// set the constants
+	res := make(Params, mimcNbRounds)
+
+	rnd := sha3.Sum256([]byte(seed))
+	value := new(big.Int).SetBytes(rnd[:])
+
+	for i := 0; i < mimcNbRounds; i++ {
+		rnd = sha3.Sum256(value.Bytes())
+		value.SetBytes(rnd[:])
+		res[i].SetBigInt(value)
+	}
+
+	return res
+}
+
+// digest represents the partial evaluation of the checksum
+// along with the params of the mimc function
+type digest struct {
+	Params Params
+	h      fr.Element
+	data   []byte // data to hash
+}
+
+// NewMiMC returns a MiMCImpl object, pure-go reference implementation
+func NewMiMC(seed string) hash.Hash {
+	d := new(digest)
+	params := NewParams(seed)
+	//d.Reset()
+	d.Params = params
+	d.Reset()
+	return d
+}
+
+// Reset resets the Hash to its initial state.
+func (d *digest) Reset() {
+	d.data = nil
+	d.h = fr.Element{0, 0, 0, 0}
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (d *digest) Sum(b []byte) []byte {
+	buffer := d.checksum()
+	d.data = nil // flush the data already hashed
+	hash := buffer.Bytes()
+	b = append(b, hash[:]...)
+	return b
+}
+
+// BlockSize returns the hash's underlying block size.
+// The Write method must be able to accept any amount
+// of data, but it may operate more efficiently if all writes
+// are a multiple of the block size.
+func (d *digest) Size() int {
+	return BlockSize
+}
+
+// BlockSize returns the number of bytes Sum will return.
+func (d *digest) BlockSize() int {
+	return BlockSize
+}
+
+// Write (via the embedded io.Writer interface) adds more data to the running hash.
+// It never returns an error.
+func (d *digest) Write(p []byte) (n int, err error) {
+	n = len(p)
+	d.data = append(d.data, p...)
+	return
+}
+
+// Hash hash using Miyaguchi–Preneel:
+// https://en.wikipedia.org/wiki/One-way_compression_function
+// The XOR operation is replaced by field addition, data is in Montgomery form
+func (d *digest) checksum() fr.Element {
+
+	var buffer [BlockSize]byte
+	var x fr.Element
+
+	// if data size is not multiple of BlockSizes we padd:
+	// .. || 0xaf8 -> .. || 0x0000...0af8
+	if len(d.data)%BlockSize != 0 {
+		q := len(d.data) / BlockSize
+		r := len(d.data) % BlockSize
+		sliceq := make([]byte, q*BlockSize)
+		copy(sliceq, d.data)
+		slicer := make([]byte, r)
+		copy(slicer, d.data[q*BlockSize:])
+		sliceremainder := make([]byte, BlockSize-r)
+		d.data = append(sliceq, sliceremainder...)
+		d.data = append(d.data, slicer...)
+	}
+
+	if len(d.data) == 0 {
+		d.data = make([]byte, 32)
+	}
+
+	nbChunks := len(d.data) / BlockSize
+
+	for i := 0; i < nbChunks; i++ {
+		copy(buffer[:], d.data[i*BlockSize:(i+1)*BlockSize])
+		x.SetBytes(buffer[:])
+		d.encrypt(x)
+		d.h.Add(&x, &d.h)
+	}
+
+	return d.h
+}
+
+// plain execution of a mimc run
+// m: message
+// k: encryption key
+func (d *digest) encrypt(m fr.Element) {
+
+	for i := 0; i < len(d.Params); i++ {
+		// m = (m+k+c)^5
+		var tmp fr.Element
+		tmp.Add(&m, &d.h).Add(&tmp, &d.Params[i])
+		m.Square(&tmp).
+			Square(&m).
+			Mul(&m, &tmp)
+	}
+	m.Add(&m, &d.h)
+	d.h = m
+}
+
+// Sum computes the mimc hash of msg from seed
+func Sum(seed string, msg []byte) ([]byte, error) {
+	params := NewParams(seed)
+	var d digest
+	d.Params = params
+	if _, err := d.Write(msg); err != nil {
+		return nil, err
+	}
+	h := d.checksum()
+	bytes := h.Bytes()
+	return bytes[:], nil
+}
diff --git a/ecc/bls12-378/fr/permutation/doc.go b/ecc/bls12-378/fr/permutation/doc.go
new file mode 100644
index 000000000..bdf98e6ca
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package permutation provides an API to build permutation proofs.
+package permutation
diff --git a/ecc/bls12-378/fr/permutation/permutation.go b/ecc/bls12-378/fr/permutation/permutation.go
new file mode 100644
index 000000000..a51f4ffd1
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/permutation.go
@@ -0,0 +1,361 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("t1 and t2 should be of the same size")
+	ErrSize             = errors.New("t1 and t2 should be of size a power of 2")
+	ErrPermutationProof = errors.New("permutation proof verification failed")
+)
+
+// Proof proof that the commitments of t1 and t2 come from
+// the same vector but permuted.
+type Proof struct {
+
+	// size of the polynomials
+	size int
+
+	// commitments of t1 & t2, the permuted vectors, and z, the accumulation
+	// polynomial
+	t1, t2, z kzg.Digest
+
+	// commitment to the quotient polynomial
+	q kzg.Digest
+
+	// opening proofs of t1, t2, z, q (in that order)
+	batchedProof kzg.BatchOpeningProof
+
+	// shifted opening proof of z
+	shiftedProof kzg.OpeningProof
+}
+
+// computeZ returns the accumulation polynomial in Lagrange basis.
+func computeZ(lt1, lt2 []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	z := make([]fr.Element, s)
+	d := make([]fr.Element, s)
+	z[0].SetOne()
+	d[0].SetOne()
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	var t fr.Element
+	for i := 0; i < s-1; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_i], t.Sub(&epsilon, &lt1[i]))
+		d[i+1].Mul(&d[i], t.Sub(&epsilon, &lt2[i]))
+	}
+	d = fr.BatchInvert(d)
+	for i := 0; i < s-1; i++ {
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		z[_ii].Mul(&z[_ii], &d[i+1])
+	}
+
+	return z
+}
+
+// computeH computes lt2*z(gx) - lt1*z
+func computeH(lt1, lt2, lz []fr.Element, epsilon fr.Element) []fr.Element {
+
+	s := len(lt1)
+	res := make([]fr.Element, s)
+	var a, b fr.Element
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_ii := int(bits.Reverse64(uint64((i+1)%s)) >> nn)
+		a.Sub(&epsilon, &lt2[_i])
+		a.Mul(&lz[_ii], &a)
+		b.Sub(&epsilon, &lt1[_i])
+		b.Mul(&lz[_i], &b)
+		res[_i].Sub(&a, &b)
+	}
+	return res
+}
+
+// computeH0 computes L0 * (z-1)
+func computeH0(lz []fr.Element, d *fft.Domain) []fr.Element {
+
+	var tn, o, g fr.Element
+	s := len(lz)
+	tn.SetUint64(2).
+		Neg(&tn)
+	u := make([]fr.Element, s)
+	o.SetOne()
+	g.Set(&d.FinerGenerator)
+	for i := 0; i < s; i++ {
+		u[i].Sub(&g, &o)
+		g.Mul(&g, &d.Generator)
+	}
+	u = fr.BatchInvert(u)
+	res := make([]fr.Element, s)
+	nn := uint64(64 - bits.TrailingZeros64(uint64(s)))
+	for i := 0; i < s; i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lz[_i], &o).
+			Mul(&res[_i], &u[i]).
+			Mul(&res[_i], &tn)
+	}
+	return res
+}
+
+// Prove generates a proof that t1 and t2 are the same but permuted.
+// The size of t1 and t2 should be the same and a power of 2.
+func Prove(srs *kzg.SRS, t1, t2 []fr.Element) (Proof, error) {
+
+	// res
+	var proof Proof
+	var err error
+
+	// size checking
+	if len(t1) != len(t2) {
+		return proof, ErrIncompatibleSize
+	}
+
+	// create the domains
+	d := fft.NewDomain(uint64(len(t1)), 1, false)
+	if d.Cardinality != uint64(len(t1)) {
+		return proof, ErrSize
+	}
+	s := int(d.Cardinality)
+	proof.size = s
+
+	// hash function for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// commit t1, t2
+	ct1 := make([]fr.Element, s)
+	ct2 := make([]fr.Element, s)
+	copy(ct1, t1)
+	copy(ct2, t2)
+	d.FFTInverse(ct1, fft.DIF, 0)
+	d.FFTInverse(ct2, fft.DIF, 0)
+	fft.BitReverse(ct1)
+	fft.BitReverse(ct2)
+	proof.t1, err = kzg.Commit(ct1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.t2, err = kzg.Commit(ct2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive challenge for z
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute Z and commit it
+	cz := computeZ(t1, t2, epsilon)
+	d.FFTInverse(cz, fft.DIT, 0)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+	lz := make([]fr.Element, s)
+	copy(lz, cz)
+	d.FFT(lz, fft.DIF, 1)
+
+	// compute the first part of the numerator
+	lt1 := make([]fr.Element, s)
+	lt2 := make([]fr.Element, s)
+	copy(lt1, ct1)
+	copy(lt2, ct2)
+	d.FFT(lt1, fft.DIF, 1)
+	d.FFT(lt2, fft.DIF, 1)
+	h := computeH(lt1, lt2, lz, epsilon)
+
+	// compute second part of the numerator
+	h0 := computeH0(lz, d)
+
+	// derive challenge used for the folding
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+
+	// fold the numerator and divide it by x^n-1
+	var t fr.Element
+	t.SetUint64(2).Neg(&t).Inverse(&t)
+	for i := 0; i < s; i++ {
+		h0[i].Mul(&omega, &h0[i]).
+			Add(&h0[i], &h[i]).
+			Mul(&h0[i], &t)
+	}
+
+	// get the quotient and commit it
+	d.FFTInverse(h0, fft.DIT, 1)
+	proof.q, err = kzg.Commit(h0, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive the evaluation challenge
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return proof, err
+	}
+
+	// compute the opening proofs
+	proof.batchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ct1,
+			ct2,
+			cz,
+			h0,
+		},
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&eta,
+		hFunc,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	eta.Mul(&eta, &d.Generator)
+	proof.shiftedProof, err = kzg.Open(
+		cz,
+		&eta,
+		d,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	// done
+	return proof, nil
+
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bls12378.G1Affine) (fr.Element, error) {
+
+	var buf [bls12378.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
+
+// Verify verifies a permutation proof.
+func Verify(srs *kzg.SRS, proof Proof) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "epsilon", "omega", "eta")
+
+	// derive the challenges
+	epsilon, err := deriveRandomness(&fs, "epsilon", &proof.t1, &proof.t2)
+	if err != nil {
+		return err
+	}
+
+	omega, err := deriveRandomness(&fs, "omega", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	eta, err := deriveRandomness(&fs, "eta", &proof.q)
+	if err != nil {
+		return err
+	}
+
+	// check the relation
+	bs := big.NewInt(int64(proof.size))
+	var l0, a, b, one, rhs, lhs fr.Element
+	one.SetOne()
+	rhs.Exp(eta, bs).
+		Sub(&rhs, &one)
+	a.Sub(&eta, &one)
+	l0.Div(&rhs, &a)
+	rhs.Mul(&rhs, &proof.batchedProof.ClaimedValues[3])
+	a.Sub(&epsilon, &proof.batchedProof.ClaimedValues[1]).
+		Mul(&a, &proof.shiftedProof.ClaimedValue)
+	b.Sub(&epsilon, &proof.batchedProof.ClaimedValues[0]).
+		Mul(&b, &proof.batchedProof.ClaimedValues[2])
+	lhs.Sub(&a, &b)
+	a.Sub(&proof.batchedProof.ClaimedValues[2], &one).
+		Mul(&a, &l0).
+		Mul(&a, &omega)
+	lhs.Add(&a, &lhs)
+	if !lhs.Equal(&rhs) {
+		return ErrPermutationProof
+	}
+
+	// check the opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.t1,
+			proof.t2,
+			proof.z,
+			proof.q,
+		},
+		&proof.batchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.Verify(&proof.z, &proof.shiftedProof, srs)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/ecc/bls12-378/fr/permutation/permutation_test.go b/ecc/bls12-378/fr/permutation/permutation_test.go
new file mode 100644
index 000000000..9519e05b8
--- /dev/null
+++ b/ecc/bls12-378/fr/permutation/permutation_test.go
@@ -0,0 +1,94 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package permutation
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+)
+
+func TestProof(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	a := make([]fr.Element, 8)
+	b := make([]fr.Element, 8)
+
+	for i := 0; i < 8; i++ {
+		a[i].SetUint64(uint64(4*i + 1))
+	}
+	for i := 0; i < 8; i++ {
+		b[i].Set(&a[(5*i)%8])
+	}
+
+	// correct proof
+	{
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		a[0].SetRandom()
+		proof, err := Prove(srs, a, b)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = Verify(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkProver(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make([]fr.Element, polySize)
+	c := make([]fr.Element, polySize)
+
+	for i := 0; i < polySize; i++ {
+		a[i].SetUint64(uint64(i))
+	}
+	for i := 0; i < polySize; i++ {
+		c[i].Set(&a[(5*i)%(polySize)])
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Prove(srs, a, c)
+	}
+
+}
diff --git a/ecc/bls12-378/fr/plookup/doc.go b/ecc/bls12-378/fr/plookup/doc.go
new file mode 100644
index 000000000..ec4b91287
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package plookup provides an API to build plookup proofs.
+package plookup
diff --git a/ecc/bls12-378/fr/plookup/plookup_test.go b/ecc/bls12-378/fr/plookup/plookup_test.go
new file mode 100644
index 000000000..d01a7c8a8
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/plookup_test.go
@@ -0,0 +1,139 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+)
+
+func TestLookupVector(t *testing.T) {
+
+	lookupVector := make(Table, 8)
+	fvector := make(Table, 7)
+	for i := 0; i < 8; i++ {
+		lookupVector[i].SetUint64(uint64(2 * i))
+	}
+	for i := 0; i < 7; i++ {
+		fvector[i].Set(&lookupVector[(4*i+1)%8])
+	}
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// correct proof vector
+	{
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proofs vector
+	{
+		fvector[0].SetRandom()
+
+		proof, err := ProveLookupVector(srs, fvector, lookupVector)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupVector(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func TestLookupTable(t *testing.T) {
+
+	srs, err := kzg.NewSRS(64, big.NewInt(13))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	lookupTable := make([]Table, 3)
+	fTable := make([]Table, 3)
+	for i := 0; i < 3; i++ {
+		lookupTable[i] = make(Table, 8)
+		fTable[i] = make(Table, 7)
+		for j := 0; j < 8; j++ {
+			lookupTable[i][j].SetUint64(uint64(2*i + j))
+		}
+		for j := 0; j < 7; j++ {
+			fTable[i][j].Set(&lookupTable[i][(4*j+1)%8])
+		}
+	}
+
+	// correct proof
+	{
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// wrong proof
+	{
+		fTable[0][0].SetRandom()
+		proof, err := ProveLookupTables(srs, fTable, lookupTable)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		err = VerifyLookupTables(srs, proof)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+
+}
+
+func BenchmarkPlookup(b *testing.B) {
+
+	srsSize := 1 << 15
+	polySize := 1 << 14
+
+	srs, _ := kzg.NewSRS(uint64(srsSize), big.NewInt(13))
+	a := make(Table, polySize)
+	c := make(Table, polySize)
+
+	for i := 0; i < 1<<14; i++ {
+		a[i].SetUint64(uint64(i))
+		c[i].SetUint64(uint64((8 * i) % polySize))
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		ProveLookupVector(srs, a, c)
+	}
+}
diff --git a/ecc/bls12-378/fr/plookup/table.go b/ecc/bls12-378/fr/plookup/table.go
new file mode 100644
index 000000000..51d432b51
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/table.go
@@ -0,0 +1,252 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"sort"
+
+	bls12378 "github.com/consensys/gnark-crypto/ecc/bls12-378"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/permutation"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrIncompatibleSize = errors.New("the tables in f and t are not of the same size")
+	ErrFoldedCommitment = errors.New("the folded commitment is malformed")
+	ErrNumberDigests    = errors.New("proof.ts and proof.fs are not of the same length")
+)
+
+// ProofLookupTables proofs that a list of tables
+type ProofLookupTables struct {
+
+	// commitments to the rows f
+	fs []kzg.Digest
+
+	// commitments to the rows of t
+	ts []kzg.Digest
+
+	// lookup proof for the f and t folded
+	foldedProof ProofLookupVector
+
+	// proof that the ts folded correspond to t in the folded proof
+	permutationProof permutation.Proof
+}
+
+// ProveLookupTables generates a proof that f, seen as a multi dimensional table,
+// consists of vectors that are in t. In other words for each i, f[:][i] must be one
+// of the t[:][j].
+//
+// For instance, if t is the truth table of the XOR function, t will be populated such
+// that t[:][i] contains the i-th entry of the truth table, so t[0][i] XOR t[1][i] = t[2][i].
+//
+// The Table in f and t are supposed to be of the same size constant size.
+func ProveLookupTables(srs *kzg.SRS, f, t []Table) (ProofLookupTables, error) {
+
+	// res
+	proof := ProofLookupTables{}
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check the sizes
+	if len(f) != len(t) {
+		return proof, ErrIncompatibleSize
+	}
+	s := len(f[0])
+	for i := 1; i < len(f); i++ {
+		if len(f[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+	s = len(t[0])
+	for i := 1; i < len(t); i++ {
+		if len(t[i]) != s {
+			return proof, ErrIncompatibleSize
+		}
+	}
+
+	// commit to the tables in f and t
+	nbRows := len(t)
+	proof.fs = make([]kzg.Digest, nbRows)
+	proof.ts = make([]kzg.Digest, nbRows)
+	_nbColumns := len(f[0]) + 1
+	if _nbColumns < len(t[0]) {
+		_nbColumns = len(t[0])
+	}
+	d := fft.NewDomain(uint64(_nbColumns), 0, false)
+	nbColumns := d.Cardinality
+	lfs := make([][]fr.Element, nbRows)
+	cfs := make([][]fr.Element, nbRows)
+	lts := make([][]fr.Element, nbRows)
+	cts := make([][]fr.Element, nbRows)
+
+	for i := 0; i < nbRows; i++ {
+
+		cfs[i] = make([]fr.Element, nbColumns)
+		lfs[i] = make([]fr.Element, nbColumns)
+		copy(cfs[i], f[i])
+		copy(lfs[i], f[i])
+		for j := len(f[i]); j < int(nbColumns); j++ {
+			cfs[i][j] = f[i][len(f[i])-1]
+			lfs[i][j] = f[i][len(f[i])-1]
+		}
+		d.FFTInverse(cfs[i], fft.DIF, 0)
+		fft.BitReverse(cfs[i])
+		proof.fs[i], err = kzg.Commit(cfs[i], srs)
+		if err != nil {
+			return proof, err
+		}
+
+		cts[i] = make([]fr.Element, nbColumns)
+		lts[i] = make([]fr.Element, nbColumns)
+		copy(cts[i], t[i])
+		copy(lts[i], t[i])
+		for j := len(t[i]); j < int(d.Cardinality); j++ {
+			cts[i][j] = t[i][len(t[i])-1]
+			lts[i][j] = t[i][len(t[i])-1]
+		}
+		d.FFTInverse(cts[i], fft.DIF, 0)
+		fft.BitReverse(cts[i])
+		proof.ts[i], err = kzg.Commit(cts[i], srs)
+		if err != nil {
+			return proof, err
+		}
+	}
+
+	// fold f and t
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = new(kzg.Digest)
+		comms[i].Set(&proof.fs[i])
+		comms[nbRows+i] = new(kzg.Digest)
+		comms[nbRows+i].Set(&proof.ts[i])
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return proof, err
+	}
+	foldedf := make(Table, nbColumns)
+	foldedt := make(Table, nbColumns)
+	for i := 0; i < int(nbColumns); i++ {
+		for j := nbRows - 1; j >= 0; j-- {
+			foldedf[i].Mul(&foldedf[i], &lambda).
+				Add(&foldedf[i], &lfs[j][i])
+			foldedt[i].Mul(&foldedt[i], &lambda).
+				Add(&foldedt[i], &lts[j][i])
+		}
+	}
+
+	// generate a proof of permutation of the foldedt and sort(foldedt)
+	foldedtSorted := make(Table, nbColumns)
+	copy(foldedtSorted, foldedt)
+	sort.Sort(foldedtSorted)
+	proof.permutationProof, err = permutation.Prove(srs, foldedt, foldedtSorted)
+	if err != nil {
+		return proof, err
+	}
+
+	// call plookupVector, on foldedf[:len(foldedf)-1] to ensure that the domain size
+	// in ProveLookupVector is the same as d's
+	proof.foldedProof, err = ProveLookupVector(srs, foldedf[:len(foldedf)-1], foldedt)
+
+	return proof, err
+}
+
+// VerifyLookupTables verifies that a ProofLookupTables proof is correct.
+func VerifyLookupTables(srs *kzg.SRS, proof ProofLookupTables) error {
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "lambda")
+
+	// check that the number of digests is the same
+	if len(proof.fs) != len(proof.ts) {
+		return ErrNumberDigests
+	}
+
+	// fold the commitments fs and ts
+	nbRows := len(proof.fs)
+	comms := make([]*kzg.Digest, 2*nbRows)
+	for i := 0; i < nbRows; i++ {
+		comms[i] = &proof.fs[i]
+		comms[i+nbRows] = &proof.ts[i]
+	}
+	lambda, err := deriveRandomness(&fs, "lambda", comms...)
+	if err != nil {
+		return err
+	}
+
+	// fold the commitments of the rows of t and f
+	var comf, comt kzg.Digest
+	comf.Set(&proof.fs[nbRows-1])
+	comt.Set(&proof.ts[nbRows-1])
+	var blambda big.Int
+	lambda.ToBigIntRegular(&blambda)
+	for i := nbRows - 2; i >= 0; i-- {
+		comf.ScalarMultiplication(&comf, &blambda).
+			Add(&comf, &proof.fs[i])
+		comt.ScalarMultiplication(&comt, &blambda).
+			Add(&comt, &proof.ts[i])
+	}
+
+	// check that the folded commitment of the fs correspond to foldedProof.f
+	if !comf.Equal(&proof.foldedProof.f) {
+		return ErrFoldedCommitment
+	}
+
+	// check that the folded commitment of the ts is a permutation of proof.FoldedProof.t
+	err = permutation.Verify(srs, proof.permutationProof)
+	if err != nil {
+		return err
+	}
+
+	// verify the inner proof
+	return VerifyLookupVector(srs, proof.foldedProof)
+}
+
+// TODO put that in fiat-shamir package
+func deriveRandomness(fs *fiatshamir.Transcript, challenge string, points ...*bls12378.G1Affine) (fr.Element, error) {
+
+	var buf [bls12378.SizeOfG1AffineUncompressed]byte
+	var r fr.Element
+
+	for _, p := range points {
+		buf = p.RawBytes()
+		if err := fs.Bind(challenge, buf[:]); err != nil {
+			return r, err
+		}
+	}
+
+	b, err := fs.ComputeChallenge(challenge)
+	if err != nil {
+		return r, err
+	}
+	r.SetBytes(b)
+	return r, nil
+}
diff --git a/ecc/bls12-378/fr/plookup/vector.go b/ecc/bls12-378/fr/plookup/vector.go
new file mode 100644
index 000000000..81b8c536b
--- /dev/null
+++ b/ecc/bls12-378/fr/plookup/vector.go
@@ -0,0 +1,687 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package plookup
+
+import (
+	"crypto/sha256"
+	"errors"
+	"math/big"
+	"math/bits"
+	"sort"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/fft"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/polynomial"
+	fiatshamir "github.com/consensys/gnark-crypto/fiat-shamir"
+)
+
+var (
+	ErrNotInTable          = errors.New("some value in the vector is not in the lookup table")
+	ErrPlookupVerification = errors.New("plookup verification failed")
+)
+
+type Table []fr.Element
+
+// Len is the number of elements in the collection.
+func (t Table) Len() int {
+	return len(t)
+}
+
+// Less reports whether the element with
+// index i should sort before the element with index j.
+func (t Table) Less(i, j int) bool {
+	return t[i].Cmp(&t[j]) == -1
+}
+
+// Swap swaps the elements with indexes i and j.
+func (t Table) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+// Proof Plookup proof, containing opening proofs
+type ProofLookupVector struct {
+
+	// size of the system
+	size uint64
+
+	// Commitments to h1, h2, t, z, f, h
+	h1, h2, t, z, f, h kzg.Digest
+
+	// Batch opening proof of h1, h2, z, t
+	BatchedProof kzg.BatchOpeningProof
+
+	// Batch opening proof of h1, h2, z shifted by g
+	BatchedProofShifted kzg.BatchOpeningProof
+}
+
+// computeZ computes Z, in Lagrange basis. Z is the accumulation of the partial
+// ratios of 2 fully split polynomials (cf https://eprint.iacr.org/2020/315.pdf)
+// * lf is the list of values that should be in lt
+// * lt is the lookup table
+// * lh1, lh2 is lf sorted by lt split in 2 overlapping slices
+// * beta, gamma are challenges (Schwartz-zippel: they are the random evaluations point)
+func computeZ(lf, lt, lh1, lh2 []fr.Element, beta, gamma fr.Element) []fr.Element {
+
+	z := make([]fr.Element, len(lt))
+
+	n := len(lt)
+	d := make([]fr.Element, n-1)
+	var u, c fr.Element
+	c.SetOne().
+		Add(&c, &beta).
+		Mul(&c, &gamma)
+	for i := 0; i < n-1; i++ {
+
+		d[i].Mul(&beta, &lh1[i+1]).
+			Add(&d[i], &lh1[i]).
+			Add(&d[i], &c)
+
+		u.Mul(&beta, &lh2[i+1]).
+			Add(&u, &lh2[i]).
+			Add(&u, &c)
+
+		d[i].Mul(&d[i], &u)
+	}
+	d = fr.BatchInvert(d)
+
+	z[0].SetOne()
+	var a, b, e fr.Element
+	e.SetOne().Add(&e, &beta)
+	for i := 0; i < n-1; i++ {
+
+		a.Add(&gamma, &lf[i])
+
+		b.Mul(&beta, &lt[i+1]).
+			Add(&b, &lt[i]).
+			Add(&b, &c)
+
+		a.Mul(&a, &b).
+			Mul(&a, &e)
+
+		z[i+1].Mul(&z[i], &a).
+			Mul(&z[i+1], &d[i])
+	}
+
+	return z
+}
+
+// computeH computes the evaluation (shifted, bit reversed) of h where
+// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+//
+// * cz, ch1, ch2, ct, cf are the polynomials z, h1, h2, t, f in canonical basis
+// * _lz, _lh1, _lh2, _lt, _lf are the polynomials z, h1, h2, t, f in shifted Lagrange basis (domainH)
+// * beta, gamma are the challenges
+// * it returns h in canonical basis
+func computeH(_lz, _lh1, _lh2, _lt, _lf []fr.Element, beta, gamma fr.Element, domainH *fft.Domain) []fr.Element {
+
+	// result
+	s := int(domainH.Cardinality)
+	num := make([]fr.Element, domainH.Cardinality)
+
+	var u, v, w, _g, m, n, one, t fr.Element
+	t.SetUint64(2).
+		Inverse(&t)
+	_g.Square(&domainH.Generator).
+		Exp(_g, big.NewInt(int64(s/2-1)))
+	one.SetOne()
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	g := make([]fr.Element, s)
+	g[0].Set(&domainH.FinerGenerator)
+	for i := 1; i < s; i++ {
+		g[i].Mul(&g[i-1], &domainH.Generator)
+	}
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		// m = (x-g**(n-1))*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX))
+		m.Mul(&v, &_lz[_i])
+		u.Add(&gamma, &_lf[_i])
+		m.Mul(&m, &u)
+		u.Mul(&beta, &_lt[_is]).
+			Add(&u, &_lt[_i]).
+			Add(&u, &w)
+		m.Mul(&m, &u)
+
+		// n = (x-g**(n-1))*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX)
+		n.Mul(&beta, &_lh1[_is]).
+			Add(&n, &_lh1[_i]).
+			Add(&n, &w)
+		u.Mul(&beta, &_lh2[_is]).
+			Add(&u, &_lh2[_i]).
+			Add(&u, &w)
+		n.Mul(&n, &u).
+			Mul(&n, &_lz[_is])
+
+		num[_i].Sub(&m, &n)
+		u.Sub(&g[i], &_g)
+		num[_i].Mul(&num[_i], &u)
+
+	}
+
+	return num
+}
+
+// computeH0 returns l0 * (z-1), in Lagrange basis and bit reversed order
+func computeH0(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(den); i++ {
+		den[i].Sub(&d, &one)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHn returns ln * (z-1), in Lagrange basis and bit reversed order
+func computeHn(lzCosetReversed []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	one.SetOne()
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(lzCosetReversed))
+	for i := 0; i < len(lzCosetReversed); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(lzCosetReversed))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < len(lzCosetReversed); i++ {
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		res[_i].Sub(&lzCosetReversed[_i], &one).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeHh1h2 returns ln * (h1 - h2(g.x)), in Lagrange basis and bit reversed order
+func computeHh1h2(_lh1, _lh2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	var one fr.Element
+	one.SetOne()
+
+	var g [2]fr.Element
+	g[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality/2)))
+	g[1].Neg(&g[0])
+	g[0].Sub(&g[0], &one)
+	g[1].Sub(&g[1], &one)
+
+	var _g, d fr.Element
+	d.Set(&domainH.FinerGenerator)
+	_g.Square(&domainH.Generator).Exp(_g, big.NewInt(int64(domainH.Cardinality/2-1)))
+	den := make([]fr.Element, len(_lh1))
+	for i := 0; i < len(_lh1); i++ {
+		den[i].Sub(&d, &_g)
+		d.Mul(&d, &domainH.Generator)
+	}
+	den = fr.BatchInvert(den)
+
+	res := make([]fr.Element, len(_lh1))
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	s := len(_lh1)
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+		_is := int(bits.Reverse64(uint64((i+2)%s)) >> nn)
+
+		res[_i].Sub(&_lh1[_i], &_lh2[_is]).
+			Mul(&res[_i], &g[i%2]).
+			Mul(&res[_i], &den[i])
+	}
+
+	return res
+}
+
+// computeQuotient computes the full quotient of the plookup protocol.
+// * alpha is the challenge to fold the numerator
+// * lh, lh0, lhn, lh1h2 are the various pieces of the numerator (Lagrange shifted form, bit reversed order)
+// * domainH fft domain
+// It returns the quotient, in canonical basis
+func computeQuotient(alpha fr.Element, lh, lh0, lhn, lh1h2 []fr.Element, domainH *fft.Domain) []fr.Element {
+
+	s := len(lh)
+	res := make([]fr.Element, s)
+
+	var one fr.Element
+	one.SetOne()
+
+	var d [2]fr.Element
+	d[0].Exp(domainH.FinerGenerator, big.NewInt(int64(domainH.Cardinality>>1)))
+	d[1].Neg(&d[0])
+	d[0].Sub(&d[0], &one).Inverse(&d[0])
+	d[1].Sub(&d[1], &one).Inverse(&d[1])
+
+	nn := uint64(64 - bits.TrailingZeros64(domainH.Cardinality))
+
+	for i := 0; i < s; i++ {
+
+		_i := int(bits.Reverse64(uint64(i)) >> nn)
+
+		res[_i].Mul(&lh1h2[_i], &alpha).
+			Add(&res[_i], &lhn[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh0[_i]).
+			Mul(&res[_i], &alpha).
+			Add(&res[_i], &lh[_i]).
+			Mul(&res[_i], &d[i%2])
+	}
+
+	domainH.FFTInverse(res, fft.DIT, 1)
+
+	return res
+}
+
+// ProveLookupVector returns proof that the values in f are in t.
+//
+// /!\IMPORTANT/!\
+//
+// If the table t is already commited somewhere (which is the normal workflow
+// before generating a lookup proof), the commitment needs to be done on the
+// table sorted. Otherwise the commitment in proof.t will not be the same as
+// the public commitment: it will contain the same values, but permuted.
+//
+func ProveLookupVector(srs *kzg.SRS, f, t Table) (ProofLookupVector, error) {
+
+	// res
+	var proof ProofLookupVector
+	var err error
+
+	// hash function used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// create domains
+	var dNum *fft.Domain
+	if len(t) <= len(f) {
+		dNum = fft.NewDomain(uint64(len(f)+1), 0, false)
+	} else {
+		dNum = fft.NewDomain(uint64(len(t)), 0, false)
+	}
+	cardDNum := int(dNum.Cardinality)
+
+	// set the size
+	proof.size = dNum.Cardinality
+
+	// resize f and t
+	// note: the last element of lf does not matter
+	lf := make([]fr.Element, cardDNum)
+	lt := make([]fr.Element, cardDNum)
+	cf := make([]fr.Element, cardDNum)
+	ct := make([]fr.Element, cardDNum)
+	copy(lt, t)
+	copy(lf, f)
+	for i := len(f); i < cardDNum; i++ {
+		lf[i] = f[len(f)-1]
+	}
+	for i := len(t); i < cardDNum; i++ {
+		lt[i] = t[len(t)-1]
+	}
+	sort.Sort(Table(lt))
+	copy(ct, lt)
+	copy(cf, lf)
+	dNum.FFTInverse(ct, fft.DIF, 0)
+	dNum.FFTInverse(cf, fft.DIF, 0)
+	fft.BitReverse(ct)
+	fft.BitReverse(cf)
+	proof.t, err = kzg.Commit(ct, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.f, err = kzg.Commit(cf, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// write f sorted by t
+	lfSortedByt := make(Table, 2*dNum.Cardinality-1)
+	copy(lfSortedByt, lt)
+	copy(lfSortedByt[dNum.Cardinality:], lf)
+	sort.Sort(lfSortedByt)
+
+	// compute h1, h2, commit to them
+	lh1 := make([]fr.Element, cardDNum)
+	lh2 := make([]fr.Element, cardDNum)
+	ch1 := make([]fr.Element, cardDNum)
+	ch2 := make([]fr.Element, cardDNum)
+	copy(lh1, lfSortedByt[:cardDNum])
+	copy(lh2, lfSortedByt[cardDNum-1:])
+
+	copy(ch1, lfSortedByt[:cardDNum])
+	copy(ch2, lfSortedByt[cardDNum-1:])
+	dNum.FFTInverse(ch1, fft.DIF, 0)
+	dNum.FFTInverse(ch2, fft.DIF, 0)
+	fft.BitReverse(ch1)
+	fft.BitReverse(ch2)
+
+	proof.h1, err = kzg.Commit(ch1, srs)
+	if err != nil {
+		return proof, err
+	}
+	proof.h2, err = kzg.Commit(ch2, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// derive beta, gamma
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return proof, err
+	}
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return proof, err
+	}
+
+	// Compute to Z
+	lz := computeZ(lf, lt, lh1, lh2, beta, gamma)
+	cz := make([]fr.Element, len(lz))
+	copy(cz, lz)
+	dNum.FFTInverse(cz, fft.DIF, 0)
+	fft.BitReverse(cz)
+	proof.z, err = kzg.Commit(cz, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// prepare data for computing the quotient
+	// compute the numerator
+	s := dNum.Cardinality
+	domainH := fft.NewDomain(uint64(2*s), 1, false)
+	_lz := make([]fr.Element, 2*s)
+	_lh1 := make([]fr.Element, 2*s)
+	_lh2 := make([]fr.Element, 2*s)
+	_lt := make([]fr.Element, 2*s)
+	_lf := make([]fr.Element, 2*s)
+	copy(_lz, cz)
+	copy(_lh1, ch1)
+	copy(_lh2, ch2)
+	copy(_lt, ct)
+	copy(_lf, cf)
+	domainH.FFT(_lz, fft.DIF, 1)
+	domainH.FFT(_lh1, fft.DIF, 1)
+	domainH.FFT(_lh2, fft.DIF, 1)
+	domainH.FFT(_lt, fft.DIF, 1)
+	domainH.FFT(_lf, fft.DIF, 1)
+
+	// compute h
+	lh := computeH(_lz, _lh1, _lh2, _lt, _lf, beta, gamma, domainH)
+
+	// compute h0
+	lh0 := computeH0(_lz, domainH)
+
+	// compute hn
+	lhn := computeHn(_lz, domainH)
+
+	// compute hh1h2
+	lh1h2 := computeHh1h2(_lh1, _lh2, domainH)
+
+	// compute the quotient
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return proof, err
+	}
+	ch := computeQuotient(alpha, lh, lh0, lhn, lh1h2, domainH)
+	proof.h, err = kzg.Commit(ch, srs)
+	if err != nil {
+		return proof, err
+	}
+
+	// build the opening proofs
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return proof, err
+	}
+	proof.BatchedProof, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+			cf,
+			ch,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	nu.Mul(&nu, &dNum.Generator)
+	proof.BatchedProofShifted, err = kzg.BatchOpenSinglePoint(
+		[]polynomial.Polynomial{
+			ch1,
+			ch2,
+			ct,
+			cz,
+		},
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&nu,
+		hFunc,
+		dNum,
+		srs,
+	)
+	if err != nil {
+		return proof, err
+	}
+
+	return proof, nil
+}
+
+// VerifyLookupVector verifies that a ProofLookupVector proof is correct
+func VerifyLookupVector(srs *kzg.SRS, proof ProofLookupVector) error {
+
+	// hash function that is used for Fiat Shamir
+	hFunc := sha256.New()
+
+	// transcript to derive the challenge
+	fs := fiatshamir.NewTranscript(hFunc, "beta", "gamma", "alpha", "nu")
+
+	// derive the various challenges
+	beta, err := deriveRandomness(&fs, "beta", &proof.t, &proof.f, &proof.h1, &proof.h2)
+	if err != nil {
+		return err
+	}
+
+	gamma, err := deriveRandomness(&fs, "gamma")
+	if err != nil {
+		return err
+	}
+
+	alpha, err := deriveRandomness(&fs, "alpha", &proof.z)
+	if err != nil {
+		return err
+	}
+
+	nu, err := deriveRandomness(&fs, "nu", &proof.h)
+	if err != nil {
+		return err
+	}
+
+	// check opening proofs
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+			proof.f,
+			proof.h,
+		},
+		&proof.BatchedProof,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	err = kzg.BatchVerifySinglePoint(
+		[]kzg.Digest{
+			proof.h1,
+			proof.h2,
+			proof.t,
+			proof.z,
+		},
+		&proof.BatchedProofShifted,
+		hFunc,
+		srs,
+	)
+	if err != nil {
+		return err
+	}
+
+	// check polynomial relation using Schwartz Zippel
+	var lhs, rhs, nun, g, _g, a, v, w, one fr.Element
+	d := fft.NewDomain(proof.size, 0, false) // only there to access to root of 1...
+	one.SetOne()
+	g.Exp(d.Generator, big.NewInt(int64(d.Cardinality-1)))
+
+	v.Add(&one, &beta)
+	w.Mul(&v, &gamma)
+
+	// h(nu) where
+	// h = (x-1)*z*(1+beta)*(gamma+f)*(gamma(1+beta) + t+ beta*t(gX)) -
+	//		(x-1)*z(gX)*(gamma(1+beta) + h1 + beta*h1(gX))*(gamma(1+beta) + h2 + beta*h2(gX) )
+	lhs.Sub(&nu, &g).
+		Mul(&lhs, &proof.BatchedProof.ClaimedValues[3]).
+		Mul(&lhs, &v)
+	a.Add(&gamma, &proof.BatchedProof.ClaimedValues[4])
+	lhs.Mul(&lhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[2]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[2]).
+		Add(&a, &w)
+	lhs.Mul(&lhs, &a)
+
+	rhs.Sub(&nu, &g).
+		Mul(&rhs, &proof.BatchedProofShifted.ClaimedValues[3])
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[0]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[0]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+	a.Mul(&beta, &proof.BatchedProofShifted.ClaimedValues[1]).
+		Add(&a, &proof.BatchedProof.ClaimedValues[1]).
+		Add(&a, &w)
+	rhs.Mul(&rhs, &a)
+
+	lhs.Sub(&lhs, &rhs)
+
+	// check consistancy of bounds
+	var l0, ln, d1, d2 fr.Element
+	l0.Exp(nu, big.NewInt(int64(d.Cardinality))).Sub(&l0, &one)
+	ln.Set(&l0)
+	d1.Sub(&nu, &one)
+	d2.Sub(&nu, &g)
+	l0.Div(&l0, &d1)
+	ln.Div(&ln, &d2)
+
+	// l0*(z-1)
+	var l0z fr.Element
+	l0z.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&l0z, &l0)
+
+	// ln*(z-1)
+	var lnz fr.Element
+	lnz.Sub(&proof.BatchedProof.ClaimedValues[3], &one).
+		Mul(&ln, &lnz)
+
+	// ln*(h1 - h2(g.x))
+	var lnh1h2 fr.Element
+	lnh1h2.Sub(&proof.BatchedProof.ClaimedValues[0], &proof.BatchedProofShifted.ClaimedValues[1]).
+		Mul(&lnh1h2, &ln)
+
+	// fold the numerator
+	lnh1h2.Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lnz).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &l0z).
+		Mul(&lnh1h2, &alpha).
+		Add(&lnh1h2, &lhs)
+
+	// (x**n-1) * h(x) evaluated at nu
+	nun.Exp(nu, big.NewInt(int64(d.Cardinality)))
+	_g.Sub(&nun, &one)
+	_g.Mul(&proof.BatchedProof.ClaimedValues[5], &_g)
+	if !lnh1h2.Equal(&_g) {
+		return ErrPlookupVerification
+	}
+
+	return nil
+}
diff --git a/ecc/bls12-378/fr/polynomial/doc.go b/ecc/bls12-378/fr/polynomial/doc.go
new file mode 100644
index 000000000..83479b058
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package polynomial provides polynomial methods and commitment schemes.
+package polynomial
diff --git a/ecc/bls12-378/fr/polynomial/polynomial.go b/ecc/bls12-378/fr/polynomial/polynomial.go
new file mode 100644
index 000000000..27b5e17d1
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/polynomial.go
@@ -0,0 +1,123 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// Polynomial polynomial represented by coefficients bn254 fr field.
+type Polynomial []fr.Element
+
+// Degree returns the degree of the polynomial, which is the length of Data.
+func (p *Polynomial) Degree() uint64 {
+	return uint64(len(*p) - 1)
+}
+
+// Eval evaluates p at v
+// returns a fr.Element
+func (p *Polynomial) Eval(v *fr.Element) fr.Element {
+
+	res := (*p)[len(*p)-1]
+	for i := len(*p) - 2; i >= 0; i-- {
+		res.Mul(&res, v)
+		res.Add(&res, &(*p)[i])
+	}
+
+	return res
+}
+
+// Clone returns a copy of the polynomial
+func (p *Polynomial) Clone() Polynomial {
+	_p := make(Polynomial, len(*p))
+	copy(_p, *p)
+	return _p
+}
+
+// AddConstantInPlace adds a constant to the polynomial, modifying p
+func (p *Polynomial) AddConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Add(&(*p)[i], c)
+	}
+}
+
+// SubConstantInPlace subs a constant to the polynomial, modifying p
+func (p *Polynomial) SubConstantInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Sub(&(*p)[i], c)
+	}
+}
+
+// ScaleInPlace multiplies p by v, modifying p
+func (p *Polynomial) ScaleInPlace(c *fr.Element) {
+	for i := 0; i < len(*p); i++ {
+		(*p)[i].Mul(&(*p)[i], c)
+	}
+}
+
+// Add adds p1 to p2
+// This function allocates a new slice unless p == p1 or p == p2
+func (p *Polynomial) Add(p1, p2 Polynomial) *Polynomial {
+
+	bigger := p1
+	smaller := p2
+	if len(bigger) < len(smaller) {
+		bigger, smaller = smaller, bigger
+	}
+
+	if len(*p) == len(bigger) && (&(*p)[0] == &bigger[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &smaller[i])
+		}
+		return p
+	}
+
+	if len(*p) == len(smaller) && (&(*p)[0] == &smaller[0]) {
+		for i := 0; i < len(smaller); i++ {
+			(*p)[i].Add(&(*p)[i], &bigger[i])
+		}
+		*p = append(*p, bigger[len(smaller):]...)
+		return p
+	}
+
+	res := make(Polynomial, len(bigger))
+	copy(res, bigger)
+	for i := 0; i < len(smaller); i++ {
+		res[i].Add(&res[i], &smaller[i])
+	}
+	*p = res
+	return p
+}
+
+// Equal checks equality between two polynomials
+func (p *Polynomial) Equal(p1 Polynomial) bool {
+	if (*p == nil) != (p1 == nil) {
+		return false
+	}
+
+	if len(*p) != len(p1) {
+		return false
+	}
+
+	for i := range p1 {
+		if !(*p)[i].Equal(&p1[i]) {
+			return false
+		}
+	}
+
+	return true
+}
diff --git a/ecc/bls12-378/fr/polynomial/polynomial_test.go b/ecc/bls12-378/fr/polynomial/polynomial_test.go
new file mode 100644
index 000000000..73994acd5
--- /dev/null
+++ b/ecc/bls12-378/fr/polynomial/polynomial_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package polynomial
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+func TestPolynomialEval(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// random value
+	var point fr.Element
+	point.SetRandom()
+
+	// compute manually f(val)
+	var expectedEval, one, den fr.Element
+	var expo big.Int
+	one.SetOne()
+	expo.SetUint64(20)
+	expectedEval.Exp(point, &expo).
+		Sub(&expectedEval, &one)
+	den.Sub(&point, &one)
+	expectedEval.Div(&expectedEval, &den)
+
+	// compute purported evaluation
+	purportedEval := f.Eval(&point)
+
+	// check
+	if !purportedEval.Equal(&expectedEval) {
+		t.Fatal("polynomial evaluation failed")
+	}
+}
+
+func TestPolynomialAddConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to add
+	var c fr.Element
+	c.SetRandom()
+
+	// add constant
+	f.AddConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Add(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("AddConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialSubConstantInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to sub
+	var c fr.Element
+	c.SetRandom()
+
+	// sub constant
+	f.SubConstantInPlace(&c)
+
+	// check
+	var expectedCoeffs, one fr.Element
+	one.SetOne()
+	expectedCoeffs.Sub(&one, &c)
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&expectedCoeffs) {
+			t.Fatal("SubConstantInPlace failed")
+		}
+	}
+}
+
+func TestPolynomialScaleInPlace(t *testing.T) {
+
+	// build polynomial
+	f := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f[i].SetOne()
+	}
+
+	// constant to scale by
+	var c fr.Element
+	c.SetRandom()
+
+	// scale by constant
+	f.ScaleInPlace(&c)
+
+	// check
+	for i := 0; i < 20; i++ {
+		if !f[i].Equal(&c) {
+			t.Fatal("ScaleInPlace failed")
+		}
+	}
+
+}
+
+func TestPolynomialAdd(t *testing.T) {
+
+	// build unbalanced polynomials
+	f1 := make(Polynomial, 20)
+	f1Backup := make(Polynomial, 20)
+	for i := 0; i < 20; i++ {
+		f1[i].SetOne()
+		f1Backup[i].SetOne()
+	}
+	f2 := make(Polynomial, 10)
+	f2Backup := make(Polynomial, 10)
+	for i := 0; i < 10; i++ {
+		f2[i].SetOne()
+		f2Backup[i].SetOne()
+	}
+
+	// expected result
+	var one, two fr.Element
+	one.SetOne()
+	two.Double(&one)
+	expectedSum := make(Polynomial, 20)
+	for i := 0; i < 10; i++ {
+		expectedSum[i].Set(&two)
+	}
+	for i := 10; i < 20; i++ {
+		expectedSum[i].Set(&one)
+	}
+
+	// caller is empty
+	var g Polynomial
+	g.Add(f1, f2)
+	if !g.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// all operands are distincts
+	_f1 := f1.Clone()
+	_f1.Add(f1, f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !f1.Equal(f1Backup) {
+		t.Fatal("side effect, f1 should not have been modified")
+	}
+	if !f2.Equal(f2Backup) {
+		t.Fatal("side effect, f2 should not have been modified")
+	}
+
+	// first operand = caller
+	_f1 = f1.Clone()
+	_f2 := f2.Clone()
+	_f1.Add(_f1, _f2)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+
+	// second operand = caller
+	_f1 = f1.Clone()
+	_f2 = f2.Clone()
+	_f1.Add(_f2, _f1)
+	if !_f1.Equal(expectedSum) {
+		t.Fatal("add polynomials fails")
+	}
+	if !_f2.Equal(f2Backup) {
+		t.Fatal("side effect, _f2 should not have been modified")
+	}
+}
diff --git a/ecc/bls12-378/fuzz.go b/ecc/bls12-378/fuzz.go
new file mode 100644
index 000000000..ad1184831
--- /dev/null
+++ b/ecc/bls12-378/fuzz.go
@@ -0,0 +1,76 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"bytes"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr/mimc"
+	"math/big"
+)
+
+const (
+	fuzzInteresting = 1
+	fuzzNormal      = 0
+	fuzzDiscard     = -1
+)
+
+func Fuzz(data []byte) int {
+	// TODO separate in multiple FuzzXXX and update continuous fuzzer scripts
+	// else, we don't really benefits for fuzzer strategy.
+	fr.Fuzz(data)
+	fp.Fuzz(data)
+	mimc.Fuzz(data)
+
+	// fuzz pairing
+	r := bytes.NewReader(data)
+	var e1, e2 fr.Element
+	e1.SetRawBytes(r)
+	e2.SetRawBytes(r)
+
+	{
+		var r, r1, r2, r1r2, zero GT
+		var b1, b2, b1b2 big.Int
+		e1.ToBigIntRegular(&b1)
+		e2.ToBigIntRegular(&b2)
+		b1b2.Mul(&b1, &b2)
+
+		var p1 G1Affine
+		var p2 G2Affine
+
+		p1.ScalarMultiplication(&g1GenAff, &b1)
+		p2.ScalarMultiplication(&g2GenAff, &b2)
+
+		r, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+		r1, _ = Pair([]G1Affine{p1}, []G2Affine{g2GenAff})
+		r2, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{p2})
+
+		r1r2.Exp(&r, b1b2)
+		r1.Exp(&r1, b2)
+		r2.Exp(&r2, b1)
+
+		if !(r1r2.Equal(&r1) && r1r2.Equal(&r2) && !r.Equal(&zero)) {
+			panic("pairing bilinearity check failed")
+		}
+	}
+
+	return fuzzNormal
+}
diff --git a/ecc/bls12-378/fuzz_test.go b/ecc/bls12-378/fuzz_test.go
new file mode 100644
index 000000000..128cc1196
--- /dev/null
+++ b/ecc/bls12-378/fuzz_test.go
@@ -0,0 +1,56 @@
+//go:build gofuzz
+// +build gofuzz
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"encoding/hex"
+	"io"
+	"math/rand"
+	"runtime/debug"
+	"testing"
+	"time"
+)
+
+func TestFuzz(t *testing.T) {
+	const maxBytes = 1 << 10
+	const testCount = 7
+	var bytes [maxBytes]byte
+	var i int
+	seed := time.Now().UnixNano()
+	defer func() {
+		if r := recover(); r != nil {
+			t.Error(r)
+			t.Error(string(debug.Stack()))
+			t.Fatal("test panicked", i, hex.EncodeToString(bytes[:i]), "seed", seed)
+		}
+	}()
+	r := rand.New(rand.NewSource(seed))
+
+	for i = 1; i < maxBytes; i++ {
+		for j := 0; j < testCount; j++ {
+			if _, err := io.ReadFull(r, bytes[:i]); err != nil {
+				t.Fatal("couldn't read random bytes", err)
+			}
+
+			Fuzz(bytes[:i])
+		}
+	}
+
+}
diff --git a/ecc/bls12-378/g1.go b/ecc/bls12-378/g1.go
new file mode 100644
index 000000000..ed417dd6b
--- /dev/null
+++ b/ecc/bls12-378/g1.go
@@ -0,0 +1,964 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G1Affine point in affine coordinates
+type G1Affine struct {
+	X, Y fp.Element
+}
+
+// G1Jac is a point with fp.Element coordinates
+type G1Jac struct {
+	X, Y, Z fp.Element
+}
+
+//  g1JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g1JacExtended struct {
+	X, Y, ZZ, ZZZ fp.Element
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G1Affine) Set(a *G1Affine) *G1Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G1Affine) ScalarMultiplication(a *G1Affine, s *big.Int) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Add(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G1Affine) Sub(a, b *G1Affine) *G1Affine {
+	var p1, p2 G1Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G1Affine) Equal(a *G1Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G1Affine) Neg(a *G1Affine) *G1Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G1Affine) FromJacobian(p1 *G1Jac) *G1Affine {
+
+	var a, b fp.Element
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G1Affine) String() string {
+	var x, y fp.Element
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G1Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Affine) IsOnCurve() bool {
+	var point G1Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G1Affine) IsInSubGroup() bool {
+	var _p G1Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G1Jac) Set(a *G1Jac) *G1Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G1Jac) Equal(a *G1Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+
+	_a := G1Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G1Jac) Neg(a *G1Jac) *G1Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G1Jac) SubAssign(a *G1Jac) *G1Jac {
+	var tmp G1Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G1Jac) AddAssign(a *G1Jac) *G1Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fp.Element
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G1Jac) AddMixed(a *G1Affine) *G1Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fp.Element
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) Double(q *G1Jac) *G1Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G1Jac) DoubleAssign() *G1Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fp.Element
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) ScalarMultiplication(a *G1Jac, s *big.Int) *G1Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G1Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G1Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G1Jac) FromAffine(Q *G1Affine) *G1Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G1Jac) IsOnCurve() bool {
+	var left, right, tmp fp.Element
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+// Z[r,0]+Z[-lambdaG1Affine, 1] is the kernel
+// of (u,v)->u+lambdaG1Affinev mod r. Expressing r, lambdaG1Affine as
+// polynomials in x, a short vector of this Zmodule is
+// 1, x**2. So we check that p+x**2*phi(p)
+// is the infinity.
+func (p *G1Jac) IsInSubGroup() bool {
+
+	var res G1Jac
+	res.phi(p).
+		ScalarMultiplication(&res, &xGen).
+		ScalarMultiplication(&res, &xGen).
+		AddAssign(p)
+
+	return res.IsOnCurve() && res.Z.IsZero()
+
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G1Jac) mulWindowed(a *G1Jac, s *big.Int) *G1Jac {
+
+	var res G1Jac
+	var ops [3]G1Jac
+
+	res.Set(&g1Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G1Jac) phi(a *G1Jac) *G1Jac {
+	p.Set(a)
+	p.X.Mul(&p.X, &thirdRootOneG1)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G1Jac) mulGLV(a *G1Jac, s *big.Int) *G1Jac {
+
+	var table [15]G1Jac
+	var res G1Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g1Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G1Affine) ClearCofactor(a *G1Affine) *G1Affine {
+	var _p G1Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in E(Fp) to E(Fp)[r]
+func (p *G1Jac) ClearCofactor(a *G1Jac) *G1Jac {
+	// cf https://eprint.iacr.org/2019/403.pdf, 5
+	var res G1Jac
+	res.ScalarMultiplication(a, &xGen).Neg(&res).AddAssign(a)
+	p.Set(&res)
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g1JacExtended) Set(a *g1JacExtended) *g1JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g1JacExtended) setInfinity() *g1JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fp.Element{}
+	p.ZZZ = fp.Element{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G1Affine) fromJacExtended(Q *g1JacExtended) *G1Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fp.Element{}
+		p.Y = fp.Element{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G1Jac) fromJacExtended(Q *g1JacExtended) *G1Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g1Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G1Jac) unsafeFromJacExtended(Q *g1JacExtended) *G1Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g1JacExtended) add(q *g1JacExtended) *g1JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fp.Element
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fp.Element
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) double(q *g1JacExtended) *g1JacExtended {
+	var U, V, W, S, XX, M fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) subMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g1JacExtended) addMixed(a *G1Affine) *g1JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fp.Element
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fp.Element{}
+		p.ZZZ = fp.Element{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fp.Element
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g1JacExtended) doubleNegMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g1JacExtended) doubleMixed(q *G1Affine) *g1JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fp.Element
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// BatchJacobianToAffineG1 converts points in Jacobian coordinates to Affine coordinates
+// performing a single field inversion (Montgomery batch inversion trick)
+// result must be allocated with len(result) == len(points)
+func BatchJacobianToAffineG1(points []G1Jac, result []G1Affine) {
+	zeroes := make([]bool, len(points))
+	accumulator := fp.One()
+
+	// batch invert all points[].Z coordinates with Montgomery batch inversion trick
+	// (stores points[].Z^-1 in result[i].X to avoid allocating a slice of fr.Elements)
+	for i := 0; i < len(points); i++ {
+		if points[i].Z.IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		result[i].X = accumulator
+		accumulator.Mul(&accumulator, &points[i].Z)
+	}
+
+	var accInverse fp.Element
+	accInverse.Inverse(&accumulator)
+
+	for i := len(points) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			// do nothing, X and Y are zeroes in affine.
+			continue
+		}
+		result[i].X.Mul(&result[i].X, &accInverse)
+		accInverse.Mul(&accInverse, &points[i].Z)
+	}
+
+	// batch convert to affine.
+	parallel.Execute(len(points), func(start, end int) {
+		for i := start; i < end; i++ {
+			if zeroes[i] {
+				// do nothing, X and Y are zeroes in affine.
+				continue
+			}
+			var a, b fp.Element
+			a = result[i].X
+			b.Square(&a)
+			result[i].X.Mul(&points[i].X, &b)
+			result[i].Y.Mul(&points[i].Y, &b).
+				Mul(&result[i].Y, &a)
+		}
+	})
+
+}
+
+// BatchScalarMultiplicationG1 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG1(base *G1Affine, scalars []fr.Element) []G1Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G1Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g1Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	// convert our base exp table into affine to use AddMixed
+	baseTableAff := make([]G1Affine, (1 << (c - 1)))
+	BatchJacobianToAffineG1(baseTable, baseTableAff)
+	toReturn := make([]G1Jac, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G1Jac
+		for i := start; i < end; i++ {
+			p.Set(&g1Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddMixed(&baseTableAff[bits-1])
+				} else {
+					// sub
+					t := baseTableAff[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddMixed(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i] = p
+
+		}
+	})
+	toReturnAff := make([]G1Affine, len(scalars))
+	BatchJacobianToAffineG1(toReturn, toReturnAff)
+	return toReturnAff
+}
diff --git a/ecc/bls12-378/g1_test.go b/ecc/bls12-378/g1_test.go
new file mode 100644
index 000000000..13346156b
--- /dev/null
+++ b/ecc/bls12-378/g1_test.go
@@ -0,0 +1,666 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG1AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res1, res2 G1Jac
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fp.Element) bool {
+			var p, res, tmp G1Jac
+			g := MapToCurveG1Svdw(a)
+			p.FromAffine(&g)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			g := MapToCurveG1Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fp.Element) bool {
+			g1 := MapToCurveG1Svdw(a)
+			g2 := MapToCurveG1Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] g1Gen (affine) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Affine
+			op1.FromJacobian(&g1Gen)
+			op2.FromJacobian(&g1Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] g1Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2, op3 G1Jac
+			op1.Set(&g1Gen)
+			op3.Set(&g1Gen)
+
+			op2 = fuzzJacobianG1Affine(&g1Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			g := fuzzJacobianG1Affine(&g1Gen, a)
+			var op1 G1Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g g1JacExtended
+			g.X.Set(&g1Gen.X)
+			g.Y.Set(&g1Gen.Y)
+			g.ZZ.Set(&g1Gen.Z)
+			g.ZZZ.Set(&g1Gen.Z)
+			gfuzz := fuzzExtendedJacobianG1Affine(&g, a)
+
+			var op1 G1Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g1Gen.X) && op1.Y.Equal(&g1Gen.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fp.Element) bool {
+			var g G1Jac
+			var op1 G1Affine
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+
+			var one fp.Element
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g1Gen.X) && g.Y.Equal(&g1Gen.Y) && g.Z.Equal(&one)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G1Jac
+			op1.FromAffine(&g)
+			var one, zero fp.Element
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G1Affine
+			var op1 g1JacExtended
+			var zero fp.Element
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G1Jac
+			var op1 g1JacExtended
+			var zero, one fp.Element
+			one.SetOne()
+			op1.X.Set(&g1Gen.X)
+			op1.Y.Set(&g1Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fp.Element) bool {
+			op1 := fuzzJacobianG1Affine(&g1Gen, a)
+			op2 := fuzzJacobianG1Affine(&g1Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BLS12-378] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			var op1, op2 G1Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop2 := fuzzJacobianG1Affine(&g1Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.AddAssign(&g1Infinity)
+			var op2 G1Jac
+			op2.Set(&g1Infinity)
+			op2.AddAssign(&g1Gen)
+			return fop1.Equal(&g1Gen) && op2.Equal(&g1Gen)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			var p1, p1Neg G1Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g1JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fp.Element) bool {
+			fop1 := fuzzJacobianG1Affine(&g1Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G1Affine
+			op2.FromJacobian(&g1Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g1Infinity)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g1Gen, &scalar)
+			op2.mulWindowed(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G1Jac
+			g.mulGLV(&g1Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G1Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g1Gen, &rminusone)
+			gneg.Neg(&g1Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g1Gen, &scalar)
+			op2.ScalarMultiplication(&g1Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g1Infinity) && !op1.Equal(&g1Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G1Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g1Gen, &r)
+			op2.mulGLV(&g1Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g1Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG1AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fp.Element
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			for x.Legendre() != 1 {
+				a.SetRandom()
+
+				x.Square(&a).Mul(&x, &a).Add(&x, &bCurveCoeff)
+
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G1Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g1Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG1AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BLS12-378] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G1Jac
+				var expected G1Affine
+				var b big.Int
+				expectedJac.mulGLV(&g1Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG1JacIsInSubGroup(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG1AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG1(&g1GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG1JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G1Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g1Gen, &scalar)
+		}
+	})
+
+	var glv G1Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g1Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG1AffineCofactorClearing(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG1JacAdd(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g1Gen)
+	}
+}
+
+func BenchmarkG1JacAddMixed(b *testing.B) {
+	var a G1Jac
+	a.Double(&g1Gen)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG1JacDouble(b *testing.B) {
+	var a G1Jac
+	a.Set(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG1JacExtAddMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtSubMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtDoubleNegMixed(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	var c G1Affine
+	c.FromJacobian(&g1Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG1JacExtAdd(b *testing.B) {
+	var a, c g1JacExtended
+	a.doubleMixed(&g1GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG1JacExtDouble(b *testing.B) {
+	var a g1JacExtended
+	a.doubleMixed(&g1GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG1Affine(p *G1Jac, f fp.Element) G1Jac {
+	var res G1Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG1Affine(p *g1JacExtended, f fp.Element) g1JacExtended {
+	var res g1JacExtended
+	var ff, fff fp.Element
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bls12-378/g2.go b/ecc/bls12-378/g2.go
new file mode 100644
index 000000000..10bbd197f
--- /dev/null
+++ b/ecc/bls12-378/g2.go
@@ -0,0 +1,978 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"math"
+	"math/big"
+	"runtime"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// G2Affine point in affine coordinates
+type G2Affine struct {
+	X, Y fptower.E2
+}
+
+// G2Jac is a point with fptower.E2 coordinates
+type G2Jac struct {
+	X, Y, Z fptower.E2
+}
+
+//  g2JacExtended parameterized jacobian coordinates (x=X/ZZ, y=Y/ZZZ, ZZ**3=ZZZ**2)
+type g2JacExtended struct {
+	X, Y, ZZ, ZZZ fptower.E2
+}
+
+// g2Proj point in projective coordinates
+type g2Proj struct {
+	x, y, z fptower.E2
+}
+
+// -------------------------------------------------------------------------------------------------
+// Affine
+
+// Set sets p to the provided point
+func (p *G2Affine) Set(a *G2Affine) *G2Affine {
+	p.X, p.Y = a.X, a.Y
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+func (p *G2Affine) ScalarMultiplication(a *G2Affine, s *big.Int) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.mulGLV(&_p, s)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// Add adds two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Add(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.AddAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Sub subs two point in affine coordinates.
+// This should rarely be used as it is very inneficient compared to Jacobian
+// TODO implement affine addition formula
+func (p *G2Affine) Sub(a, b *G2Affine) *G2Affine {
+	var p1, p2 G2Jac
+	p1.FromAffine(a)
+	p2.FromAffine(b)
+	p1.SubAssign(&p2)
+	p.FromJacobian(&p1)
+	return p
+}
+
+// Equal tests if two points (in Affine coordinates) are equal
+func (p *G2Affine) Equal(a *G2Affine) bool {
+	return p.X.Equal(&a.X) && p.Y.Equal(&a.Y)
+}
+
+// Neg computes -G
+func (p *G2Affine) Neg(a *G2Affine) *G2Affine {
+	p.X = a.X
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// FromJacobian rescale a point in Jacobian coord in z=1 plane
+func (p *G2Affine) FromJacobian(p1 *G2Jac) *G2Affine {
+
+	var a, b fptower.E2
+
+	if p1.Z.IsZero() {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return p
+	}
+
+	a.Inverse(&p1.Z)
+	b.Square(&a)
+	p.X.Mul(&p1.X, &b)
+	p.Y.Mul(&p1.Y, &b).Mul(&p.Y, &a)
+
+	return p
+}
+
+func (p *G2Affine) String() string {
+	var x, y fptower.E2
+	x.Set(&p.X)
+	y.Set(&p.Y)
+	return "E([" + x.String() + "," + y.String() + "]),"
+}
+
+// IsInfinity checks if the point is infinity (in affine, it's encoded as (0,0))
+func (p *G2Affine) IsInfinity() bool {
+	return p.X.IsZero() && p.Y.IsZero()
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Affine) IsOnCurve() bool {
+	var point G2Jac
+	point.FromAffine(p)
+	return point.IsOnCurve() // call this function to handle infinity point
+}
+
+// IsInSubGroup returns true if p is in the correct subgroup, false otherwise
+func (p *G2Affine) IsInSubGroup() bool {
+	var _p G2Jac
+	_p.FromAffine(p)
+	return _p.IsInSubGroup()
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian
+
+// Set sets p to the provided point
+func (p *G2Jac) Set(a *G2Jac) *G2Jac {
+	p.X, p.Y, p.Z = a.X, a.Y, a.Z
+	return p
+}
+
+// Equal tests if two points (in Jacobian coordinates) are equal
+func (p *G2Jac) Equal(a *G2Jac) bool {
+
+	if p.Z.IsZero() && a.Z.IsZero() {
+		return true
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+
+	_a := G2Affine{}
+	_a.FromJacobian(a)
+
+	return _p.X.Equal(&_a.X) && _p.Y.Equal(&_a.Y)
+}
+
+// Neg computes -G
+func (p *G2Jac) Neg(a *G2Jac) *G2Jac {
+	*p = *a
+	p.Y.Neg(&a.Y)
+	return p
+}
+
+// SubAssign substracts two points on the curve
+func (p *G2Jac) SubAssign(a *G2Jac) *G2Jac {
+	var tmp G2Jac
+	tmp.Set(a)
+	tmp.Y.Neg(&tmp.Y)
+	p.AddAssign(&tmp)
+	return p
+}
+
+// AddAssign point addition in montgomery form
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+func (p *G2Jac) AddAssign(a *G2Jac) *G2Jac {
+
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.Set(a)
+		return p
+	}
+
+	// a is infinity, return p
+	if a.Z.IsZero() {
+		return p
+	}
+
+	var Z1Z1, Z2Z2, U1, U2, S1, S2, H, I, J, r, V fptower.E2
+	Z1Z1.Square(&a.Z)
+	Z2Z2.Square(&p.Z)
+	U1.Mul(&a.X, &Z2Z2)
+	U2.Mul(&p.X, &Z1Z1)
+	S1.Mul(&a.Y, &p.Z).
+		Mul(&S1, &Z2Z2)
+	S2.Mul(&p.Y, &a.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U1.Equal(&U2) && S1.Equal(&S2) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &U1)
+	I.Double(&H).
+		Square(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &S1).Double(&r)
+	V.Mul(&U1, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	S1.Mul(&S1, &J).Double(&S1)
+	p.Y.Sub(&p.Y, &S1)
+	p.Z.Add(&p.Z, &a.Z)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &Z2Z2).
+		Mul(&p.Z, &H)
+
+	return p
+}
+
+// AddMixed point addition
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
+func (p *G2Jac) AddMixed(a *G2Affine) *G2Jac {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.Z.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.Z.SetOne()
+		return p
+	}
+
+	var Z1Z1, U2, S2, H, HH, I, J, r, V fptower.E2
+	Z1Z1.Square(&p.Z)
+	U2.Mul(&a.X, &Z1Z1)
+	S2.Mul(&a.Y, &p.Z).
+		Mul(&S2, &Z1Z1)
+
+	// if p == a, we double instead
+	if U2.Equal(&p.X) && S2.Equal(&p.Y) {
+		return p.DoubleAssign()
+	}
+
+	H.Sub(&U2, &p.X)
+	HH.Square(&H)
+	I.Double(&HH).Double(&I)
+	J.Mul(&H, &I)
+	r.Sub(&S2, &p.Y).Double(&r)
+	V.Mul(&p.X, &I)
+	p.X.Square(&r).
+		Sub(&p.X, &J).
+		Sub(&p.X, &V).
+		Sub(&p.X, &V)
+	J.Mul(&J, &p.Y).Double(&J)
+	p.Y.Sub(&V, &p.X).
+		Mul(&p.Y, &r)
+	p.Y.Sub(&p.Y, &J)
+	p.Z.Add(&p.Z, &H)
+	p.Z.Square(&p.Z).
+		Sub(&p.Z, &Z1Z1).
+		Sub(&p.Z, &HH)
+
+	return p
+}
+
+// Double doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) Double(q *G2Jac) *G2Jac {
+	p.Set(q)
+	p.DoubleAssign()
+	return p
+}
+
+// DoubleAssign doubles a point in Jacobian coordinates
+// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2007-bl
+func (p *G2Jac) DoubleAssign() *G2Jac {
+
+	var XX, YY, YYYY, ZZ, S, M, T fptower.E2
+
+	XX.Square(&p.X)
+	YY.Square(&p.Y)
+	YYYY.Square(&YY)
+	ZZ.Square(&p.Z)
+	S.Add(&p.X, &YY)
+	S.Square(&S).
+		Sub(&S, &XX).
+		Sub(&S, &YYYY).
+		Double(&S)
+	M.Double(&XX).Add(&M, &XX)
+	p.Z.Add(&p.Z, &p.Y).
+		Square(&p.Z).
+		Sub(&p.Z, &YY).
+		Sub(&p.Z, &ZZ)
+	T.Square(&M)
+	p.X = T
+	T.Double(&S)
+	p.X.Sub(&p.X, &T)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M)
+	YYYY.Double(&YYYY).Double(&YYYY).Double(&YYYY)
+	p.Y.Sub(&p.Y, &YYYY)
+
+	return p
+}
+
+// ScalarMultiplication computes and returns p = a*s
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) ScalarMultiplication(a *G2Jac, s *big.Int) *G2Jac {
+	return p.mulGLV(a, s)
+}
+
+func (p *G2Jac) String() string {
+	if p.Z.IsZero() {
+		return "O"
+	}
+	_p := G2Affine{}
+	_p.FromJacobian(p)
+	return "E([" + _p.X.String() + "," + _p.Y.String() + "]),"
+}
+
+// FromAffine sets p = Q, p in Jacboian, Q in affine
+func (p *G2Jac) FromAffine(Q *G2Affine) *G2Jac {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.Z.SetZero()
+		p.X.SetOne()
+		p.Y.SetOne()
+		return p
+	}
+	p.Z.SetOne()
+	p.X.Set(&Q.X)
+	p.Y.Set(&Q.Y)
+	return p
+}
+
+// IsOnCurve returns true if p in on the curve
+func (p *G2Jac) IsOnCurve() bool {
+	var left, right, tmp fptower.E2
+	left.Square(&p.Y)
+	right.Square(&p.X).Mul(&right, &p.X)
+	tmp.Square(&p.Z).
+		Square(&tmp).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &p.Z).
+		Mul(&tmp, &bTwistCurveCoeff)
+	right.Add(&right, &tmp)
+	return left.Equal(&right)
+}
+
+// https://eprint.iacr.org/2021/1130.pdf, sec.4
+// psi(p) = u*P
+func (p *G2Jac) IsInSubGroup() bool {
+	var res, tmp G2Jac
+	tmp.psi(p)
+	res.ScalarMultiplication(p, &xGen).
+		SubAssign(&tmp)
+
+	return res.IsOnCurve() && res.Z.IsZero()
+}
+
+// mulWindowed 2-bits windowed exponentiation
+func (p *G2Jac) mulWindowed(a *G2Jac, s *big.Int) *G2Jac {
+
+	var res G2Jac
+	var ops [3]G2Jac
+
+	res.Set(&g2Infinity)
+	ops[0].Set(a)
+	ops[1].Double(&ops[0])
+	ops[2].Set(&ops[0]).AddAssign(&ops[1])
+
+	b := s.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0xc0)
+		for j := 0; j < 4; j++ {
+			res.DoubleAssign().DoubleAssign()
+			c := (w & mask) >> (6 - 2*j)
+			if c != 0 {
+				res.AddAssign(&ops[c-1])
+			}
+			mask = mask >> 2
+		}
+	}
+	p.Set(&res)
+
+	return p
+
+}
+
+// psi(p) = u o frob o u**-1 where u:E'->E iso from the twist to E
+func (p *G2Jac) psi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.Conjugate(&p.X).Mul(&p.X, &endo.u)
+	p.Y.Conjugate(&p.Y).Mul(&p.Y, &endo.v)
+	p.Z.Conjugate(&p.Z)
+	return p
+}
+
+// phi assigns p to phi(a) where phi: (x,y)->(ux,y), and returns p
+func (p *G2Jac) phi(a *G2Jac) *G2Jac {
+	p.Set(a)
+	p.X.MulByElement(&p.X, &thirdRootOneG2)
+	return p
+}
+
+// mulGLV performs scalar multiplication using GLV
+// see https://www.iacr.org/archive/crypto2001/21390189.pdf
+func (p *G2Jac) mulGLV(a *G2Jac, s *big.Int) *G2Jac {
+
+	var table [15]G2Jac
+	var res G2Jac
+	var k1, k2 fr.Element
+
+	res.Set(&g2Infinity)
+
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a
+	table[0].Set(a)
+	table[3].phi(a)
+
+	// split the scalar, modifies +-a, phi(a) accordingly
+	k := ecc.SplitScalar(s, &glvBasis)
+
+	if k[0].Sign() == -1 {
+		k[0].Neg(&k[0])
+		table[0].Neg(&table[0])
+	}
+	if k[1].Sign() == -1 {
+		k[1].Neg(&k[1])
+		table[3].Neg(&table[3])
+	}
+
+	// precompute table (2 bits sliding window)
+	// table[b3b2b1b0-1] = b3b2*phi(a) + b1b0*a if b3b2b1b0 != 0
+	table[1].Double(&table[0])
+	table[2].Set(&table[1]).AddAssign(&table[0])
+	table[4].Set(&table[3]).AddAssign(&table[0])
+	table[5].Set(&table[3]).AddAssign(&table[1])
+	table[6].Set(&table[3]).AddAssign(&table[2])
+	table[7].Double(&table[3])
+	table[8].Set(&table[7]).AddAssign(&table[0])
+	table[9].Set(&table[7]).AddAssign(&table[1])
+	table[10].Set(&table[7]).AddAssign(&table[2])
+	table[11].Set(&table[7]).AddAssign(&table[3])
+	table[12].Set(&table[11]).AddAssign(&table[0])
+	table[13].Set(&table[11]).AddAssign(&table[1])
+	table[14].Set(&table[11]).AddAssign(&table[2])
+
+	// bounds on the lattice base vectors guarantee that k1, k2 are len(r)/2 bits long max
+	k1.SetBigInt(&k[0]).FromMont()
+	k2.SetBigInt(&k[1]).FromMont()
+
+	// loop starts from len(k1)/2 due to the bounds
+	for i := int(math.Ceil(fr.Limbs/2. - 1)); i >= 0; i-- {
+		mask := uint64(3) << 62
+		for j := 0; j < 32; j++ {
+			res.Double(&res).Double(&res)
+			b1 := (k1[i] & mask) >> (62 - 2*j)
+			b2 := (k2[i] & mask) >> (62 - 2*j)
+			if b1|b2 != 0 {
+				s := (b2<<2 | b1)
+				res.AddAssign(&table[s-1])
+			}
+			mask = mask >> 2
+		}
+	}
+
+	p.Set(&res)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Affine) ClearCofactor(a *G2Affine) *G2Affine {
+	var _p G2Jac
+	_p.FromAffine(a)
+	_p.ClearCofactor(&_p)
+	p.FromJacobian(&_p)
+	return p
+}
+
+// ClearCofactor maps a point in curve to r-torsion
+func (p *G2Jac) ClearCofactor(a *G2Jac) *G2Jac {
+	// https://eprint.iacr.org/2017/419.pdf, 4.1
+	var xg, xxg, res, t G2Jac
+	xg.ScalarMultiplication(a, &xGen)
+	xxg.ScalarMultiplication(&xg, &xGen)
+
+	res.Set(&xxg).
+		SubAssign(&xg).
+		SubAssign(a)
+
+	t.Set(&xg).
+		SubAssign(a).
+		psi(&t)
+
+	res.AddAssign(&t)
+
+	t.Double(a)
+	t.X.MulByElement(&t.X, &thirdRootOneG1)
+
+	res.SubAssign(&t)
+
+	p.Set(&res)
+
+	return p
+
+}
+
+// -------------------------------------------------------------------------------------------------
+// Jacobian extended
+
+// Set sets p to the provided point
+func (p *g2JacExtended) Set(a *g2JacExtended) *g2JacExtended {
+	p.X, p.Y, p.ZZ, p.ZZZ = a.X, a.Y, a.ZZ, a.ZZZ
+	return p
+}
+
+// setInfinity sets p to O
+func (p *g2JacExtended) setInfinity() *g2JacExtended {
+	p.X.SetOne()
+	p.Y.SetOne()
+	p.ZZ = fptower.E2{}
+	p.ZZZ = fptower.E2{}
+	return p
+}
+
+// fromJacExtended sets Q in affine coords
+func (p *G2Affine) fromJacExtended(Q *g2JacExtended) *G2Affine {
+	if Q.ZZ.IsZero() {
+		p.X = fptower.E2{}
+		p.Y = fptower.E2{}
+		return p
+	}
+	p.X.Inverse(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Inverse(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	return p
+}
+
+// fromJacExtended sets Q in Jacobian coords
+func (p *G2Jac) fromJacExtended(Q *g2JacExtended) *G2Jac {
+	if Q.ZZ.IsZero() {
+		p.Set(&g2Infinity)
+		return p
+	}
+	p.X.Mul(&Q.ZZ, &Q.X).Mul(&p.X, &Q.ZZ)
+	p.Y.Mul(&Q.ZZZ, &Q.Y).Mul(&p.Y, &Q.ZZZ)
+	p.Z.Set(&Q.ZZZ)
+	return p
+}
+
+// unsafeFromJacExtended sets p in jacobian coords, but don't check for infinity
+func (p *G2Jac) unsafeFromJacExtended(Q *g2JacExtended) *G2Jac {
+	p.X.Square(&Q.ZZ).Mul(&p.X, &Q.X)
+	p.Y.Square(&Q.ZZZ).Mul(&p.Y, &Q.Y)
+	p.Z = Q.ZZZ
+	return p
+}
+
+// add point in ZZ coords
+// https://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s
+func (p *g2JacExtended) add(q *g2JacExtended) *g2JacExtended {
+	//if q is infinity return p
+	if q.ZZ.IsZero() {
+		return p
+	}
+	// p is infinity, return q
+	if p.ZZ.IsZero() {
+		p.Set(q)
+		return p
+	}
+
+	var A, B, X1ZZ2, X2ZZ1, Y1ZZZ2, Y2ZZZ1 fptower.E2
+
+	// p2: q, p1: p
+	X2ZZ1.Mul(&q.X, &p.ZZ)
+	X1ZZ2.Mul(&p.X, &q.ZZ)
+	A.Sub(&X2ZZ1, &X1ZZ2)
+	Y2ZZZ1.Mul(&q.Y, &p.ZZZ)
+	Y1ZZZ2.Mul(&p.Y, &q.ZZZ)
+	B.Sub(&Y2ZZZ1, &Y1ZZZ2)
+
+	if A.IsZero() {
+		if B.IsZero() {
+			return p.double(q)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var U1, U2, S1, S2, P, R, PP, PPP, Q, V fptower.E2
+	U1.Mul(&p.X, &q.ZZ)
+	U2.Mul(&q.X, &p.ZZ)
+	S1.Mul(&p.Y, &q.ZZZ)
+	S2.Mul(&q.Y, &p.ZZZ)
+	P.Sub(&U2, &U1)
+	R.Sub(&S2, &S1)
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&U1, &PP)
+	V.Mul(&S1, &PPP)
+
+	p.X.Square(&R).
+		Sub(&p.X, &PPP).
+		Sub(&p.X, &Q).
+		Sub(&p.X, &Q)
+	p.Y.Sub(&Q, &p.X).
+		Mul(&p.Y, &R).
+		Sub(&p.Y, &V)
+	p.ZZ.Mul(&p.ZZ, &q.ZZ).
+		Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &q.ZZZ).
+		Mul(&p.ZZZ, &PPP)
+
+	return p
+}
+
+// double point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) double(q *g2JacExtended) *g2JacExtended {
+	var U, V, W, S, XX, M fptower.E2
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	U.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S).
+		Sub(&p.X, &S)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &U)
+	p.ZZ.Mul(&V, &q.ZZ)
+	p.ZZZ.Mul(&W, &q.ZZZ)
+
+	return p
+}
+
+// subMixed same as addMixed, but will negate a.Y
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) subMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y.Neg(&a.Y)
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fptower.E2
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Neg(&R)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleNegMixed(a)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fptower.E2
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// addMixed
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s
+func (p *g2JacExtended) addMixed(a *G2Affine) *g2JacExtended {
+
+	//if a is infinity return p
+	if a.X.IsZero() && a.Y.IsZero() {
+		return p
+	}
+	// p is infinity, return a
+	if p.ZZ.IsZero() {
+		p.X = a.X
+		p.Y = a.Y
+		p.ZZ.SetOne()
+		p.ZZZ.SetOne()
+		return p
+	}
+
+	var P, R fptower.E2
+
+	// p2: a, p1: p
+	P.Mul(&a.X, &p.ZZ)
+	P.Sub(&P, &p.X)
+
+	R.Mul(&a.Y, &p.ZZZ)
+	R.Sub(&R, &p.Y)
+
+	if P.IsZero() {
+		if R.IsZero() {
+			return p.doubleMixed(a)
+
+		}
+		p.ZZ = fptower.E2{}
+		p.ZZZ = fptower.E2{}
+		return p
+	}
+
+	var PP, PPP, Q, Q2, RR, X3, Y3 fptower.E2
+
+	PP.Square(&P)
+	PPP.Mul(&P, &PP)
+	Q.Mul(&p.X, &PP)
+	RR.Square(&R)
+	X3.Sub(&RR, &PPP)
+	Q2.Double(&Q)
+	p.X.Sub(&X3, &Q2)
+	Y3.Sub(&Q, &p.X).Mul(&Y3, &R)
+	R.Mul(&p.Y, &PPP)
+	p.Y.Sub(&Y3, &R)
+	p.ZZ.Mul(&p.ZZ, &PP)
+	p.ZZZ.Mul(&p.ZZZ, &PPP)
+
+	return p
+
+}
+
+// doubleNegMixed same as double, but will negate q.Y
+func (p *g2JacExtended) doubleNegMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fptower.E2
+
+	U.Double(&q.Y)
+	U.Neg(&U)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Add(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// doubleMixed point in ZZ coords
+// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1
+func (p *g2JacExtended) doubleMixed(q *G2Affine) *g2JacExtended {
+
+	var U, V, W, S, XX, M, S2, L fptower.E2
+
+	U.Double(&q.Y)
+	V.Square(&U)
+	W.Mul(&U, &V)
+	S.Mul(&q.X, &V)
+	XX.Square(&q.X)
+	M.Double(&XX).
+		Add(&M, &XX) // -> + a, but a=0 here
+	S2.Double(&S)
+	L.Mul(&W, &q.Y)
+
+	p.X.Square(&M).
+		Sub(&p.X, &S2)
+	p.Y.Sub(&S, &p.X).
+		Mul(&p.Y, &M).
+		Sub(&p.Y, &L)
+	p.ZZ.Set(&V)
+	p.ZZZ.Set(&W)
+
+	return p
+}
+
+// -------------------------------------------------------------------------------------------------
+// Homogenous projective
+
+// Set sets p to the provided point
+func (p *g2Proj) Set(a *g2Proj) *g2Proj {
+	p.x, p.y, p.z = a.x, a.y, a.z
+	return p
+}
+
+// Neg computes -G
+func (p *g2Proj) Neg(a *g2Proj) *g2Proj {
+	*p = *a
+	p.y.Neg(&a.y)
+	return p
+}
+
+// FromJacobian converts a point from Jacobian to projective coordinates
+func (p *g2Proj) FromJacobian(Q *G2Jac) *g2Proj {
+	var buf fptower.E2
+	buf.Square(&Q.Z)
+
+	p.x.Mul(&Q.X, &Q.Z)
+	p.y.Set(&Q.Y)
+	p.z.Mul(&Q.Z, &buf)
+
+	return p
+}
+
+// FromAffine sets p = Q, p in homogenous projective, Q in affine
+func (p *g2Proj) FromAffine(Q *G2Affine) *g2Proj {
+	if Q.X.IsZero() && Q.Y.IsZero() {
+		p.z.SetZero()
+		p.x.SetOne()
+		p.y.SetOne()
+		return p
+	}
+	p.z.SetOne()
+	p.x.Set(&Q.X)
+	p.y.Set(&Q.Y)
+	return p
+}
+
+// BatchScalarMultiplicationG2 multiplies the same base (generator) by all scalars
+// and return resulting points in affine coordinates
+// uses a simple windowed-NAF like exponentiation algorithm
+func BatchScalarMultiplicationG2(base *G2Affine, scalars []fr.Element) []G2Affine {
+
+	// approximate cost in group ops is
+	// cost = 2^{c-1} + n(scalar.nbBits+nbChunks)
+
+	nbPoints := uint64(len(scalars))
+	min := ^uint64(0)
+	bestC := 0
+	for c := 2; c < 18; c++ {
+		cost := uint64(1 << (c - 1))
+		nbChunks := uint64(fr.Limbs * 64 / c)
+		if (fr.Limbs*64)%c != 0 {
+			nbChunks++
+		}
+		cost += nbPoints * ((fr.Limbs * 64) + nbChunks)
+		if cost < min {
+			min = cost
+			bestC = c
+		}
+	}
+	c := uint64(bestC) // window size
+	nbChunks := int(fr.Limbs * 64 / c)
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	// precompute all powers of base for our window
+	// note here that if performance is critical, we can implement as in the msmX methods
+	// this allocation to be on the stack
+	baseTable := make([]G2Jac, (1 << (c - 1)))
+	baseTable[0].Set(&g2Infinity)
+	baseTable[0].AddMixed(base)
+	for i := 1; i < len(baseTable); i++ {
+		baseTable[i] = baseTable[i-1]
+		baseTable[i].AddMixed(base)
+	}
+
+	pScalars, _ := partitionScalars(scalars, c, false, runtime.NumCPU())
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := 0; chunk < nbChunks; chunk++ {
+		jc := uint64(uint64(chunk) * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = (64%c) != 0 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+	toReturn := make([]G2Affine, len(scalars))
+
+	// for each digit, take value in the base table, double it c time, voila.
+	parallel.Execute(len(pScalars), func(start, end int) {
+		var p G2Jac
+		for i := start; i < end; i++ {
+			p.Set(&g2Infinity)
+			for chunk := nbChunks - 1; chunk >= 0; chunk-- {
+				s := selectors[chunk]
+				if chunk != nbChunks-1 {
+					for j := uint64(0); j < c; j++ {
+						p.DoubleAssign()
+					}
+				}
+
+				bits := (pScalars[i][s.index] & s.mask) >> s.shift
+				if s.multiWordSelect {
+					bits += (pScalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+				}
+
+				if bits == 0 {
+					continue
+				}
+
+				// if msbWindow bit is set, we need to substract
+				if bits&msbWindow == 0 {
+					// add
+					p.AddAssign(&baseTable[bits-1])
+				} else {
+					// sub
+					t := baseTable[bits & ^msbWindow]
+					t.Neg(&t)
+					p.AddAssign(&t)
+				}
+			}
+
+			// set our result point
+			toReturn[i].FromJacobian(&p)
+
+		}
+	})
+	return toReturn
+}
diff --git a/ecc/bls12-378/g2_test.go b/ecc/bls12-378/g2_test.go
new file mode 100644
index 000000000..f813c2b39
--- /dev/null
+++ b/ecc/bls12-378/g2_test.go
@@ -0,0 +1,685 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestG2AffineEndomorphism(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] check that phi(P) = lambdaGLV * P", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res1, res2 G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			res1.phi(&p)
+			res2.mulWindowed(&p, &lambdaGLV)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] check that phi^2(P) + phi(P) + P = 0", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res, tmp G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			tmp.phi(&p)
+			res.phi(&tmp).
+				AddAssign(&tmp).
+				AddAssign(&p)
+
+			return res.Z.IsZero()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] check that psi^2(P) = -phi(P)", prop.ForAll(
+		func(a fptower.E2) bool {
+			var p, res1, res2 G2Jac
+			g := MapToCurveG2Svdw(a)
+			p.FromAffine(&g)
+			res1.psi(&p).psi(&res1).Neg(&res1)
+			res2.Set(&p)
+			res2.X.MulByElement(&res2.X, &thirdRootOneG1)
+
+			return p.IsInSubGroup() && res1.Equal(&res2)
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestMapToCurveG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Svsw mapping should output point on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			g := MapToCurveG2Svdw(a)
+			return g.IsInSubGroup()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[G2] Svsw mapping should be deterministic", prop.ForAll(
+		func(a fptower.E2) bool {
+			g1 := MapToCurveG2Svdw(a)
+			g2 := MapToCurveG2Svdw(a)
+			return g1.Equal(&g2)
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineIsOnCurve(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] g2Gen (affine) should be on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			var op1, op2 G2Affine
+			op1.FromJacobian(&g2Gen)
+			op2.FromJacobian(&g2Gen)
+			op2.Y.Mul(&op2.Y, &a)
+			return op1.IsOnCurve() && !op2.IsOnCurve()
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] g2Gen (Jacobian) should be on the curve", prop.ForAll(
+		func(a fptower.E2) bool {
+			var op1, op2, op3 G2Jac
+			op1.Set(&g2Gen)
+			op3.Set(&g2Gen)
+
+			op2 = fuzzJacobianG2Affine(&g2Gen, a)
+			op3.Y.Mul(&op3.Y, &a)
+			return op1.IsOnCurve() && op2.IsOnCurve() && !op3.IsOnCurve()
+		},
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineConversions(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Affine representation should be independent of the Jacobian representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			g := fuzzJacobianG2Affine(&g2Gen, a)
+			var op1 G2Affine
+			op1.FromJacobian(&g)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Affine representation should be independent of a Extended Jacobian representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			var g g2JacExtended
+			g.X.Set(&g2Gen.X)
+			g.Y.Set(&g2Gen.Y)
+			g.ZZ.Set(&g2Gen.Z)
+			g.ZZZ.Set(&g2Gen.Z)
+			gfuzz := fuzzExtendedJacobianG2Affine(&g, a)
+
+			var op1 G2Affine
+			op1.fromJacExtended(&gfuzz)
+			return op1.X.Equal(&g2Gen.X) && op1.Y.Equal(&g2Gen.Y)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Jacobian representation should be the same as the affine representative", prop.ForAll(
+		func(a fptower.E2) bool {
+			var g G2Jac
+			var op1 G2Affine
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+
+			var one fptower.E2
+			one.SetOne()
+
+			g.FromAffine(&op1)
+
+			return g.X.Equal(&g2Gen.X) && g.Y.Equal(&g2Gen.Y) && g.Z.Equal(&one)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] Converting affine symbol for infinity to Jacobian should output correct infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			g.X.SetZero()
+			g.Y.SetZero()
+			var op1 G2Jac
+			op1.FromAffine(&g)
+			var one, zero fptower.E2
+			one.SetOne()
+			return op1.X.Equal(&one) && op1.Y.Equal(&one) && op1.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to affine should output infinity symbol in Affine", prop.ForAll(
+		func() bool {
+			var g G2Affine
+			var op1 g2JacExtended
+			var zero fptower.E2
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&zero) && g.Y.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] Converting infinity in extended Jacobian to Jacobian should output infinity in Jacobian", prop.ForAll(
+		func() bool {
+			var g G2Jac
+			var op1 g2JacExtended
+			var zero, one fptower.E2
+			one.SetOne()
+			op1.X.Set(&g2Gen.X)
+			op1.Y.Set(&g2Gen.Y)
+			g.fromJacExtended(&op1)
+			return g.X.Equal(&one) && g.Y.Equal(&one) && g.Z.Equal(&zero)
+		},
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Two representatives of the same class should be equal", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			op1 := fuzzJacobianG2Affine(&g2Gen, a)
+			op2 := fuzzJacobianG2Affine(&g2Gen, b)
+			return op1.Equal(&op2)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	properties.Property("[BLS12-378] [Jacobian] Add should call double when having adding the same point", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			var op1, op2 G2Jac
+			op1.Set(&fop1).AddAssign(&fop2)
+			op2.Double(&fop2)
+			return op1.Equal(&op2)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the opposite of a point to itself should output inf", prop.ForAll(
+		func(a, b fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop2 := fuzzJacobianG2Affine(&g2Gen, b)
+			fop2.Neg(&fop2)
+			fop1.AddAssign(&fop2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenE2(),
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Adding the inf to a point should not modify the point", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.AddAssign(&g2Infinity)
+			var op2 G2Jac
+			op2.Set(&g2Infinity)
+			op2.AddAssign(&g2Gen)
+			return fop1.Equal(&g2Gen) && op2.Equal(&g2Gen)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] addMixed (-G) should equal subMixed(G)", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.addMixed(&p1Neg)
+			o2.subMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian Extended] doubleMixed (-G) should equal doubleNegMixed(G)", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			var p1, p1Neg G2Affine
+			p1.FromJacobian(&fop1)
+			p1Neg = p1
+			p1Neg.Y.Neg(&p1Neg.Y)
+			var o1, o2 g2JacExtended
+			o1.doubleMixed(&p1Neg)
+			o2.doubleNegMixed(&p1)
+
+			return o1.X.Equal(&o2.X) &&
+				o1.Y.Equal(&o2.Y) &&
+				o1.ZZ.Equal(&o2.ZZ) &&
+				o1.ZZZ.Equal(&o2.ZZZ)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] [Jacobian] Addmix the negation to itself should output 0", prop.ForAll(
+		func(a fptower.E2) bool {
+			fop1 := fuzzJacobianG2Affine(&g2Gen, a)
+			fop1.Neg(&fop1)
+			var op2 G2Affine
+			op2.FromJacobian(&g2Gen)
+			fop1.AddMixed(&op2)
+			return fop1.Equal(&g2Infinity)
+		},
+		GenE2(),
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (double and add) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.mulWindowed(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.mulWindowed(&g2Gen, &scalar)
+			op2.mulWindowed(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] psi should map points from E' to itself", prop.ForAll(
+		func() bool {
+			var a G2Jac
+			a.psi(&g2Gen)
+			return a.IsOnCurve() && !a.Equal(&g2Gen)
+		},
+	))
+
+	properties.Property("[BLS12-378] scalar multiplication (GLV) should depend only on the scalar mod r", prop.ForAll(
+		func(s fr.Element) bool {
+
+			r := fr.Modulus()
+			var g G2Jac
+			g.mulGLV(&g2Gen, r)
+
+			var scalar, blindedScalar, rminusone big.Int
+			var op1, op2, op3, gneg G2Jac
+			rminusone.SetUint64(1).Sub(r, &rminusone)
+			op3.ScalarMultiplication(&g2Gen, &rminusone)
+			gneg.Neg(&g2Gen)
+			s.ToBigIntRegular(&scalar)
+			blindedScalar.Mul(&scalar, r).Add(&blindedScalar, &scalar)
+			op1.ScalarMultiplication(&g2Gen, &scalar)
+			op2.ScalarMultiplication(&g2Gen, &blindedScalar)
+
+			return op1.Equal(&op2) && g.Equal(&g2Infinity) && !op1.Equal(&g2Infinity) && gneg.Equal(&op3)
+
+		},
+		genScalar,
+	))
+
+	properties.Property("[BLS12-378] GLV and Double and Add should output the same result", prop.ForAll(
+		func(s fr.Element) bool {
+
+			var r big.Int
+			var op1, op2 G2Jac
+			s.ToBigIntRegular(&r)
+			op1.mulWindowed(&g2Gen, &r)
+			op2.mulGLV(&g2Gen, &r)
+			return op1.Equal(&op2) && !op1.Equal(&g2Infinity)
+
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineCofactorCleaning(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[BLS12-378] Clearing the cofactor of a random point should set it in the r-torsion", prop.ForAll(
+		func() bool {
+			var a, x, b fptower.E2
+			a.SetRandom()
+
+			x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+			for x.Legendre() != 1 {
+				a.SetRandom()
+				x.Square(&a).Mul(&x, &a).Add(&x, &bTwistCurveCoeff)
+			}
+
+			b.Sqrt(&x)
+			var point, pointCleared, infinity G2Jac
+			point.X.Set(&a)
+			point.Y.Set(&b)
+			point.Z.SetOne()
+			pointCleared.ClearCofactor(&point)
+			infinity.Set(&g2Infinity)
+			return point.IsOnCurve() && pointCleared.IsInSubGroup() && !pointCleared.Equal(&infinity)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestG2AffineBatchScalarMultiplication(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 10
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 10
+
+	properties.Property("[BLS12-378] BatchScalarMultiplication should be consistant with individual scalar multiplications", prop.ForAll(
+		func(mixer fr.Element) bool {
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			result := BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:])
+
+			if len(result) != len(sampleScalars) {
+				return false
+			}
+
+			for i := 0; i < len(result); i++ {
+				var expectedJac G2Jac
+				var expected G2Affine
+				var b big.Int
+				expectedJac.mulGLV(&g2Gen, sampleScalars[i].ToBigInt(&b))
+				expected.FromJacobian(&expectedJac)
+				if !result[i].Equal(&expected) {
+					return false
+				}
+			}
+			return true
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkG2JacIsInSubGroup(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.IsInSubGroup()
+	}
+
+}
+
+func BenchmarkG2AffineBatchScalarMul(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = 15
+	const nbSamples = 1 << pow
+
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+	}
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				_ = BatchScalarMultiplicationG2(&g2GenAff, sampleScalars[:using])
+			}
+		})
+	}
+}
+
+func BenchmarkG2JacScalarMul(b *testing.B) {
+
+	var scalar big.Int
+	r := fr.Modulus()
+	scalar.SetString("5243587517512619047944770508185965837690552500527637822603658699938581184513", 10)
+	scalar.Add(&scalar, r)
+
+	var doubleAndAdd G2Jac
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.mulWindowed(&g2Gen, &scalar)
+		}
+	})
+
+	var glv G2Jac
+	b.Run("GLV", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			glv.mulGLV(&g2Gen, &scalar)
+		}
+	})
+
+}
+
+func BenchmarkG2AffineCofactorClearing(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	for i := 0; i < b.N; i++ {
+		a.ClearCofactor(&a)
+	}
+}
+
+func BenchmarkG2JacAdd(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddAssign(&g2Gen)
+	}
+}
+
+func BenchmarkG2JacAddMixed(b *testing.B) {
+	var a G2Jac
+	a.Double(&g2Gen)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.AddMixed(&c)
+	}
+
+}
+
+func BenchmarkG2JacDouble(b *testing.B) {
+	var a G2Jac
+	a.Set(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.DoubleAssign()
+	}
+
+}
+
+func BenchmarkG2JacExtAddMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.addMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtSubMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.subMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtDoubleNegMixed(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	var c G2Affine
+	c.FromJacobian(&g2Gen)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.doubleNegMixed(&c)
+	}
+}
+
+func BenchmarkG2JacExtAdd(b *testing.B) {
+	var a, c g2JacExtended
+	a.doubleMixed(&g2GenAff)
+	c.double(&a)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.add(&c)
+	}
+}
+
+func BenchmarkG2JacExtDouble(b *testing.B) {
+	var a g2JacExtended
+	a.doubleMixed(&g2GenAff)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.double(&a)
+	}
+}
+
+func fuzzJacobianG2Affine(p *G2Jac, f fptower.E2) G2Jac {
+	var res G2Jac
+	res.X.Mul(&p.X, &f).Mul(&res.X, &f)
+	res.Y.Mul(&p.Y, &f).Mul(&res.Y, &f).Mul(&res.Y, &f)
+	res.Z.Mul(&p.Z, &f)
+	return res
+}
+
+func fuzzExtendedJacobianG2Affine(p *g2JacExtended, f fptower.E2) g2JacExtended {
+	var res g2JacExtended
+	var ff, fff fptower.E2
+	ff.Square(&f)
+	fff.Mul(&ff, &f)
+	res.X.Mul(&p.X, &ff)
+	res.Y.Mul(&p.Y, &fff)
+	res.ZZ.Mul(&p.ZZ, &ff)
+	res.ZZZ.Mul(&p.ZZZ, &fff)
+	return res
+}
diff --git a/ecc/bls12-378/hash_to_curve.go b/ecc/bls12-378/hash_to_curve.go
new file mode 100644
index 000000000..74326a85a
--- /dev/null
+++ b/ecc/bls12-378/hash_to_curve.go
@@ -0,0 +1,276 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bls12378
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// hashToFp hashes msg to count prime field elements.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-5.2
+func hashToFp(msg, dst []byte, count int) ([]fp.Element, error) {
+
+	// 128 bits of security
+	// L = ceil((ceil(log2(p)) + k) / 8), where k is the security parameter = 128
+	L := 64
+
+	lenInBytes := count * L
+	pseudoRandomBytes, err := ecc.ExpandMsgXmd(msg, dst, lenInBytes)
+	if err != nil {
+		return nil, err
+	}
+
+	res := make([]fp.Element, count)
+	for i := 0; i < count; i++ {
+		res[i].SetBytes(pseudoRandomBytes[i*L : (i+1)*L])
+	}
+	return res, nil
+}
+
+// returns false if u>-u when seen as a bigInt
+func sign0(u fp.Element) bool {
+	var a, b big.Int
+	u.ToBigIntRegular(&a)
+	u.Neg(&u)
+	u.ToBigIntRegular(&b)
+	return a.Cmp(&b) <= 0
+}
+
+// ----------------------------------------------------------------------------------------
+// G1Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG1(u fp.Element) G1Affine {
+
+	var res G1Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fp.Element
+	z.SetOne()
+	c1.SetString("2")
+	c2.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c3.SetString("287149757441151686413597772668332250774959033155223733277500818890482204710560507876289457345990628536787382328589")
+	c4.SetString("403498804050204114488832321054533973826565127847283275835680496023160623935600869850855868232558207447316188626942")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fp.Element
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bCurveCoeff)
+	e2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if e2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u) && sign0(y)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG1Svdw(t fp.Element) G1Affine {
+	res := svdwMapG1(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	t, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+	res = MapToCurveG1Svdw(t[0])
+	return res, nil
+}
+
+// HashToCurveG1Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1Svdw(msg, dst []byte) (G1Affine, error) {
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	Q0 := MapToCurveG1Svdw(u[0])
+	Q1 := MapToCurveG1Svdw(u[1])
+	var _Q0, _Q1, _res G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
+
+// ----------------------------------------------------------------------------------------
+// G2Affine
+
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-4.1
+// Shallue and van de Woestijne method, works for any elliptic curve in Weierstrass curve
+func svdwMapG2(u fptower.E2) G2Affine {
+
+	var res G2Affine
+
+	// constants
+	// sage script to find z: https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#appendix-E.1
+	var z, c1, c2, c3, c4 fptower.E2
+	z.A0.SetOne()
+	z.A1.SetOne()
+	c1.A0.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940403")
+	c1.A1.SetString("605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940416")
+	c2.A0.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c2.A1.SetString("302624103037653085866624240790900480369923845885462456876760372017370467951700652388141901174418655585487141470208")
+	c3.A0.SetString("296552843788751288906244499216725356684281694271241895700730864223961612014909088554048735457137528455181151573749")
+	c3.A1.SetString("181388265705333345538985517067130917207305732282979825233670477511990909086507141331244586890249042878909613862256")
+	c4.A0.SetString("224166002250113396938240178363629985459202848804046264353155831123978124408667149917142149018087893026286771459412")
+	c4.A1.SetString("313832403150158755713536249709081979642883988325664770094418163573569374172134009883999008625323050236801480043178")
+
+	var tv1, tv2, tv3, tv4, one, x1, gx1, x2, gx2, x3, x, gx, y fptower.E2
+	one.SetOne()
+	tv1.Square(&u).Mul(&tv1, &c1)
+	tv2.Add(&one, &tv1)
+	tv1.Sub(&one, &tv1)
+	tv3.Mul(&tv2, &tv1).Inverse(&tv3)
+	tv4.Mul(&u, &tv1)
+	tv4.Mul(&tv4, &tv3)
+	tv4.Mul(&tv4, &c3)
+	x1.Sub(&c2, &tv4)
+	gx1.Square(&x1)
+	// 12. gx1 = gx1 + A
+	gx1.Mul(&gx1, &x1)
+	gx1.Add(&gx1, &bTwistCurveCoeff)
+	e1 := gx1.Legendre()
+	x2.Add(&c2, &tv4)
+	gx2.Square(&x2)
+	// 18. gx2 = gx2 + A
+	gx2.Mul(&gx2, &x2)
+	gx2.Add(&gx2, &bTwistCurveCoeff)
+	e2 := gx2.Legendre() - e1 // 2 if is_square(gx2) AND NOT e1
+	x3.Square(&tv2)
+	x3.Mul(&x3, &tv3)
+	x3.Square(&x3)
+	x3.Mul(&x3, &c4)
+	x3.Add(&x3, &z)
+	if e1 == 1 {
+		x.Set(&x1)
+	} else {
+		x.Set(&x3)
+	}
+	if e2 == 2 {
+		x.Set(&x2)
+	}
+	gx.Square(&x)
+	// gx = gx + A
+	gx.Mul(&gx, &x)
+	gx.Add(&gx, &bTwistCurveCoeff)
+	y.Sqrt(&gx)
+	e3 := sign0(u.A0) && sign0(y.A0)
+	if !e3 {
+		y.Neg(&y)
+	}
+	res.X.Set(&x)
+	res.Y.Set(&y)
+
+	return res
+}
+
+// MapToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.1
+func MapToCurveG2Svdw(t fptower.E2) G2Affine {
+	res := svdwMapG2(t)
+	res.ClearCofactor(&res)
+	return res
+}
+
+// EncodeToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-2.2.2
+func EncodeToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	_t, err := hashToFp(msg, dst, 2)
+	if err != nil {
+		return res, err
+	}
+	var t fptower.E2
+	t.A0.Set(&_t[0])
+	t.A1.Set(&_t[1])
+	res = MapToCurveG2Svdw(t)
+	return res, nil
+}
+
+// HashToCurveG2Svdw maps an fp.Element to a point on the curve using the Shallue and van de Woestijne map
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG2Svdw(msg, dst []byte) (G2Affine, error) {
+	var res G2Affine
+	u, err := hashToFp(msg, dst, 4)
+	if err != nil {
+		return res, err
+	}
+	var u0, u1 fptower.E2
+	u0.A0.Set(&u[0])
+	u0.A1.Set(&u[1])
+	u1.A0.Set(&u[2])
+	u1.A1.Set(&u[3])
+	Q0 := MapToCurveG2Svdw(u0)
+	Q1 := MapToCurveG2Svdw(u1)
+	var _Q0, _Q1, _res G2Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1)
+	_res.Set(&_Q1).AddAssign(&_Q0)
+	res.FromJacobian(&_res)
+	return res, nil
+}
diff --git a/ecc/bls12-378/internal/fptower/asm.go b/ecc/bls12-378/internal/fptower/asm.go
new file mode 100644
index 000000000..0ec192019
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/asm.go
@@ -0,0 +1,28 @@
+//go:build !noadx
+// +build !noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import "golang.org/x/sys/cpu"
+
+// supportAdx will be set only on amd64 that has MULX and ADDX instructions
+var (
+	supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
+	_          = supportAdx // used in asm
+)
diff --git a/ecc/bls12-378/internal/fptower/asm_noadx.go b/ecc/bls12-378/internal/fptower/asm_noadx.go
new file mode 100644
index 000000000..6a09c11c4
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/asm_noadx.go
@@ -0,0 +1,25 @@
+//go:build noadx
+// +build noadx
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
+// certain errors (like fatal error: missing stackmap)
+// this ensures we test all asm path.
+var supportAdx = false
diff --git a/ecc/bls12-378/internal/fptower/e12.go b/ecc/bls12-378/internal/fptower/e12.go
new file mode 100644
index 000000000..aea14150d
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12.go
@@ -0,0 +1,561 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"encoding/binary"
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"math/big"
+)
+
+// E12 is a degree two finite field extension of fp6
+type E12 struct {
+	C0, C1 E6
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E12) Equal(x *E12) bool {
+	return z.C0.Equal(&x.C0) && z.C1.Equal(&x.C1)
+}
+
+// String puts E12 in string form
+func (z *E12) String() string {
+	return (z.C0.String() + "+(" + z.C1.String() + ")*w")
+}
+
+// SetString sets a E12 from string
+func (z *E12) SetString(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11 string) *E12 {
+	z.C0.SetString(s0, s1, s2, s3, s4, s5)
+	z.C1.SetString(s6, s7, s8, s9, s10, s11)
+	return z
+}
+
+// Set copies x into z and returns z
+func (z *E12) Set(x *E12) *E12 {
+	z.C0 = x.C0
+	z.C1 = x.C1
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E12) SetOne() *E12 {
+	*z = E12{}
+	z.C0.B0.A0.SetOne()
+	return z
+}
+
+// ToMont converts to Mont form
+func (z *E12) ToMont() *E12 {
+	z.C0.ToMont()
+	z.C1.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E12) FromMont() *E12 {
+	z.C0.FromMont()
+	z.C1.FromMont()
+	return z
+}
+
+// Add set z=x+y in E12 and return z
+func (z *E12) Add(x, y *E12) *E12 {
+	z.C0.Add(&x.C0, &y.C0)
+	z.C1.Add(&x.C1, &y.C1)
+	return z
+}
+
+// Sub sets z to x sub y and return z
+func (z *E12) Sub(x, y *E12) *E12 {
+	z.C0.Sub(&x.C0, &y.C0)
+	z.C1.Sub(&x.C1, &y.C1)
+	return z
+}
+
+// Double sets z=2*x and returns z
+func (z *E12) Double(x *E12) *E12 {
+	z.C0.Double(&x.C0)
+	z.C1.Double(&x.C1)
+	return z
+}
+
+// SetRandom used only in tests
+func (z *E12) SetRandom() (*E12, error) {
+	if _, err := z.C0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.C1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// Mul set z=x*y in E12 and return z
+func (z *E12) Mul(x, y *E12) *E12 {
+	var a, b, c E6
+	a.Add(&x.C0, &x.C1)
+	b.Add(&y.C0, &y.C1)
+	a.Mul(&a, &b)
+	b.Mul(&x.C0, &y.C0)
+	c.Mul(&x.C1, &y.C1)
+	z.C1.Sub(&a, &b).Sub(&z.C1, &c)
+	z.C0.MulByNonResidue(&c).Add(&z.C0, &b)
+	return z
+}
+
+// Square set z=x*x in E12 and return z
+func (z *E12) Square(x *E12) *E12 {
+
+	//Algorithm 22 from https://eprint.iacr.org/2010/354.pdf
+	var c0, c2, c3 E6
+	c0.Sub(&x.C0, &x.C1)
+	c3.MulByNonResidue(&x.C1).Neg(&c3).Add(&x.C0, &c3)
+	c2.Mul(&x.C0, &x.C1)
+	c0.Mul(&c0, &c3).Add(&c0, &c2)
+	z.C1.Double(&c2)
+	c2.MulByNonResidue(&c2)
+	z.C0.Add(&c0, &c2)
+
+	return z
+}
+
+// Karabina's compressed cyclotomic square
+// https://eprint.iacr.org/2010/542.pdf
+// Th. 3.2 with minor modifications to fit our tower
+func (z *E12) CyclotomicSquareCompressed(x *E12) *E12 {
+
+	var t [7]E2
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = g5^2
+	t[1].Square(&x.C1.B2)
+	// t5 = g1 + g5
+	t[5].Add(&x.C0.B1, &x.C1.B2)
+	// t2 = (g1 + g5)^2
+	t[2].Square(&t[5])
+
+	// t3 = g1^2 + g5^2
+	t[3].Add(&t[0], &t[1])
+	// t5 = 2 * g1 * g5
+	t[5].Sub(&t[2], &t[3])
+
+	// t6 = g3 + g2
+	t[6].Add(&x.C1.B0, &x.C0.B2)
+	// t3 = (g3 + g2)^2
+	t[3].Square(&t[6])
+	// t2 = g3^2
+	t[2].Square(&x.C1.B0)
+
+	// t6 = 2 * nr * g1 * g5
+	t[6].MulByNonResidue(&t[5])
+	// t5 = 4 * nr * g1 * g5 + 2 * g3
+	t[5].Add(&t[6], &x.C1.B0).
+		Double(&t[5])
+	// z3 = 6 * nr * g1 * g5 + 2 * g3
+	z.C1.B0.Add(&t[5], &t[6])
+
+	// t4 = nr * g5^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = nr * g5^2 + g1^2
+	t[5].Add(&t[0], &t[4])
+	// t6 = nr * g5^2 + g1^2 - g2
+	t[6].Sub(&t[5], &x.C0.B2)
+
+	// t1 = g2^2
+	t[1].Square(&x.C0.B2)
+
+	// t6 = 2 * nr * g5^2 + 2 * g1^2 - 2*g2
+	t[6].Double(&t[6])
+	// z2 = 3 * nr * g5^2 + 3 * g1^2 - 2*g2
+	z.C0.B2.Add(&t[6], &t[5])
+
+	// t4 = nr * g2^2
+	t[4].MulByNonResidue(&t[1])
+	// t5 = g3^2 + nr * g2^2
+	t[5].Add(&t[2], &t[4])
+	// t6 = g3^2 + nr * g2^2 - g1
+	t[6].Sub(&t[5], &x.C0.B1)
+	// t6 = 2 * g3^2 + 2 * nr * g2^2 - 2 * g1
+	t[6].Double(&t[6])
+	// z1 = 3 * g3^2 + 3 * nr * g2^2 - 2 * g1
+	z.C0.B1.Add(&t[6], &t[5])
+
+	// t0 = g2^2 + g3^2
+	t[0].Add(&t[2], &t[1])
+	// t5 = 2 * g3 * g2
+	t[5].Sub(&t[3], &t[0])
+	// t6 = 2 * g3 * g2 + g5
+	t[6].Add(&t[5], &x.C1.B2)
+	// t6 = 4 * g3 * g2 + 2 * g5
+	t[6].Double(&t[6])
+	// z5 = 6 * g3 * g2 + 2 * g5
+	z.C1.B2.Add(&t[5], &t[6])
+
+	return z
+}
+
+// Decompress Karabina's cyclotomic square result
+func (z *E12) Decompress(x *E12) *E12 {
+
+	var t [3]E2
+	var one E2
+	one.SetOne()
+
+	// t0 = g1^2
+	t[0].Square(&x.C0.B1)
+	// t1 = 3 * g1^2 - 2 * g2
+	t[1].Sub(&t[0], &x.C0.B2).
+		Double(&t[1]).
+		Add(&t[1], &t[0])
+		// t0 = E * g5^2 + t1
+	t[2].Square(&x.C1.B2)
+	t[0].MulByNonResidue(&t[2]).
+		Add(&t[0], &t[1])
+	// t1 = 1/(4 * g3)
+	t[1].Double(&x.C1.B0).
+		Double(&t[1]).
+		Inverse(&t[1]) // costly
+	// z4 = g4
+	z.C1.B1.Mul(&t[0], &t[1])
+
+	// t1 = g2 * g1
+	t[1].Mul(&x.C0.B2, &x.C0.B1)
+	// t2 = 2 * g4^2 - 3 * g2 * g1
+	t[2].Square(&z.C1.B1).
+		Sub(&t[2], &t[1]).
+		Double(&t[2]).
+		Sub(&t[2], &t[1])
+	// t1 = g3 * g5
+	t[1].Mul(&x.C1.B0, &x.C1.B2)
+	// c_0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+	t[2].Add(&t[2], &t[1])
+	z.C0.B0.MulByNonResidue(&t[2]).
+		Add(&z.C0.B0, &one)
+
+	z.C0.B1.Set(&x.C0.B1)
+	z.C0.B2.Set(&x.C0.B2)
+	z.C1.B0.Set(&x.C1.B0)
+	z.C1.B2.Set(&x.C1.B2)
+
+	return z
+}
+
+// BatchDecompress multiple Karabina's cyclotomic square results
+func BatchDecompress(x []E12) []E12 {
+
+	n := len(x)
+	if n == 0 {
+		return x
+	}
+
+	t0 := make([]E2, n)
+	t1 := make([]E2, n)
+	t2 := make([]E2, n)
+
+	var one E2
+	one.SetOne()
+
+	for i := 0; i < n; i++ {
+		// t0 = g1^2
+		t0[i].Square(&x[i].C0.B1)
+		// t1 = 3 * g1^2 - 2 * g2
+		t1[i].Sub(&t0[i], &x[i].C0.B2).
+			Double(&t1[i]).
+			Add(&t1[i], &t0[i])
+			// t0 = E * g5^2 + t1
+		t2[i].Square(&x[i].C1.B2)
+		t0[i].MulByNonResidue(&t2[i]).
+			Add(&t0[i], &t1[i])
+		// t1 = 4 * g3
+		t1[i].Double(&x[i].C1.B0).
+			Double(&t1[i])
+	}
+
+	t1 = BatchInvert(t1) // costs 1 inverse
+
+	for i := 0; i < n; i++ {
+		// z4 = g4
+		x[i].C1.B1.Mul(&t0[i], &t1[i])
+
+		// t1 = g2 * g1
+		t1[i].Mul(&x[i].C0.B2, &x[i].C0.B1)
+		// t2 = 2 * g4^2 - 3 * g2 * g1
+		t2[i].Square(&x[i].C1.B1)
+		t2[i].Sub(&t2[i], &t1[i])
+		t2[i].Double(&t2[i])
+		t2[i].Sub(&t2[i], &t1[i])
+
+		// t1 = g3 * g5
+		t1[i].Mul(&x[i].C1.B0, &x[i].C1.B2)
+		// z0 = E * (2 * g4^2 + g3 * g5 - 3 * g2 * g1) + 1
+		t2[i].Add(&t2[i], &t1[i])
+		x[i].C0.B0.MulByNonResidue(&t2[i]).
+			Add(&x[i].C0.B0, &one)
+	}
+
+	return x
+}
+
+// Granger-Scott's cyclotomic square
+// https://eprint.iacr.org/2009/565.pdf, 3.2
+func (z *E12) CyclotomicSquare(x *E12) *E12 {
+
+	// x=(x0,x1,x2,x3,x4,x5,x6,x7) in E2^6
+	// cyclosquare(x)=(3*x4^2*u + 3*x0^2 - 2*x0,
+	//					3*x2^2*u + 3*x3^2 - 2*x1,
+	//					3*x5^2*u + 3*x1^2 - 2*x2,
+	//					6*x1*x5*u + 2*x3,
+	//					6*x0*x4 + 2*x4,
+	//					6*x2*x3 + 2*x5)
+
+	var t [9]E2
+
+	t[0].Square(&x.C1.B1)
+	t[1].Square(&x.C0.B0)
+	t[6].Add(&x.C1.B1, &x.C0.B0).Square(&t[6]).Sub(&t[6], &t[0]).Sub(&t[6], &t[1]) // 2*x4*x0
+	t[2].Square(&x.C0.B2)
+	t[3].Square(&x.C1.B0)
+	t[7].Add(&x.C0.B2, &x.C1.B0).Square(&t[7]).Sub(&t[7], &t[2]).Sub(&t[7], &t[3]) // 2*x2*x3
+	t[4].Square(&x.C1.B2)
+	t[5].Square(&x.C0.B1)
+	t[8].Add(&x.C1.B2, &x.C0.B1).Square(&t[8]).Sub(&t[8], &t[4]).Sub(&t[8], &t[5]).MulByNonResidue(&t[8]) // 2*x5*x1*u
+
+	t[0].MulByNonResidue(&t[0]).Add(&t[0], &t[1]) // x4^2*u + x0^2
+	t[2].MulByNonResidue(&t[2]).Add(&t[2], &t[3]) // x2^2*u + x3^2
+	t[4].MulByNonResidue(&t[4]).Add(&t[4], &t[5]) // x5^2*u + x1^2
+
+	z.C0.B0.Sub(&t[0], &x.C0.B0).Double(&z.C0.B0).Add(&z.C0.B0, &t[0])
+	z.C0.B1.Sub(&t[2], &x.C0.B1).Double(&z.C0.B1).Add(&z.C0.B1, &t[2])
+	z.C0.B2.Sub(&t[4], &x.C0.B2).Double(&z.C0.B2).Add(&z.C0.B2, &t[4])
+
+	z.C1.B0.Add(&t[8], &x.C1.B0).Double(&z.C1.B0).Add(&z.C1.B0, &t[8])
+	z.C1.B1.Add(&t[6], &x.C1.B1).Double(&z.C1.B1).Add(&z.C1.B1, &t[6])
+	z.C1.B2.Add(&t[7], &x.C1.B2).Double(&z.C1.B2).Add(&z.C1.B2, &t[7])
+
+	return z
+}
+
+// Inverse set z to the inverse of x in E12 and return z
+func (z *E12) Inverse(x *E12) *E12 {
+	// Algorithm 23 from https://eprint.iacr.org/2010/354.pdf
+
+	var t0, t1, tmp E6
+	t0.Square(&x.C0)
+	t1.Square(&x.C1)
+	tmp.MulByNonResidue(&t1)
+	t0.Sub(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.C0.Mul(&x.C0, &t1)
+	z.C1.Mul(&x.C1, &t1).Neg(&z.C1)
+
+	return z
+}
+
+// Exp sets z=x**e and returns it
+func (z *E12) Exp(x *E12, e big.Int) *E12 {
+	var res E12
+	res.SetOne()
+	b := e.Bytes()
+	for i := range b {
+		w := b[i]
+		mask := byte(0x80)
+		for j := 7; j >= 0; j-- {
+			res.Square(&res)
+			if (w&mask)>>j != 0 {
+				res.Mul(&res, x)
+			}
+			mask = mask >> 1
+		}
+	}
+	z.Set(&res)
+	return z
+}
+
+// InverseUnitary inverse a unitary element
+func (z *E12) InverseUnitary(x *E12) *E12 {
+	return z.Conjugate(x)
+}
+
+// Conjugate set z to x conjugated and return z
+func (z *E12) Conjugate(x *E12) *E12 {
+	*z = *x
+	z.C1.Neg(&z.C1)
+	return z
+}
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = 48 * 12
+
+// Marshal converts z to a byte slice
+func (z *E12) Marshal() []byte {
+	b := z.Bytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (z *E12) Unmarshal(buf []byte) error {
+	return z.SetBytes(buf)
+}
+
+// Bytes returns the regular (non montgomery) value
+// of z as a big-endian byte array.
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E12) Bytes() (r [SizeOfGT]byte) {
+	_z := *z
+	_z.FromMont()
+	binary.BigEndian.PutUint64(r[568:576], _z.C0.B0.A0[0])
+	binary.BigEndian.PutUint64(r[560:568], _z.C0.B0.A0[1])
+	binary.BigEndian.PutUint64(r[552:560], _z.C0.B0.A0[2])
+	binary.BigEndian.PutUint64(r[544:552], _z.C0.B0.A0[3])
+	binary.BigEndian.PutUint64(r[536:544], _z.C0.B0.A0[4])
+	binary.BigEndian.PutUint64(r[528:536], _z.C0.B0.A0[5])
+
+	binary.BigEndian.PutUint64(r[520:528], _z.C0.B0.A1[0])
+	binary.BigEndian.PutUint64(r[512:520], _z.C0.B0.A1[1])
+	binary.BigEndian.PutUint64(r[504:512], _z.C0.B0.A1[2])
+	binary.BigEndian.PutUint64(r[496:504], _z.C0.B0.A1[3])
+	binary.BigEndian.PutUint64(r[488:496], _z.C0.B0.A1[4])
+	binary.BigEndian.PutUint64(r[480:488], _z.C0.B0.A1[5])
+
+	binary.BigEndian.PutUint64(r[472:480], _z.C0.B1.A0[0])
+	binary.BigEndian.PutUint64(r[464:472], _z.C0.B1.A0[1])
+	binary.BigEndian.PutUint64(r[456:464], _z.C0.B1.A0[2])
+	binary.BigEndian.PutUint64(r[448:456], _z.C0.B1.A0[3])
+	binary.BigEndian.PutUint64(r[440:448], _z.C0.B1.A0[4])
+	binary.BigEndian.PutUint64(r[432:440], _z.C0.B1.A0[5])
+
+	binary.BigEndian.PutUint64(r[424:432], _z.C0.B1.A1[0])
+	binary.BigEndian.PutUint64(r[416:424], _z.C0.B1.A1[1])
+	binary.BigEndian.PutUint64(r[408:416], _z.C0.B1.A1[2])
+	binary.BigEndian.PutUint64(r[400:408], _z.C0.B1.A1[3])
+	binary.BigEndian.PutUint64(r[392:400], _z.C0.B1.A1[4])
+	binary.BigEndian.PutUint64(r[384:392], _z.C0.B1.A1[5])
+
+	binary.BigEndian.PutUint64(r[376:384], _z.C0.B2.A0[0])
+	binary.BigEndian.PutUint64(r[368:376], _z.C0.B2.A0[1])
+	binary.BigEndian.PutUint64(r[360:368], _z.C0.B2.A0[2])
+	binary.BigEndian.PutUint64(r[352:360], _z.C0.B2.A0[3])
+	binary.BigEndian.PutUint64(r[344:352], _z.C0.B2.A0[4])
+	binary.BigEndian.PutUint64(r[336:344], _z.C0.B2.A0[5])
+
+	binary.BigEndian.PutUint64(r[328:336], _z.C0.B2.A1[0])
+	binary.BigEndian.PutUint64(r[320:328], _z.C0.B2.A1[1])
+	binary.BigEndian.PutUint64(r[312:320], _z.C0.B2.A1[2])
+	binary.BigEndian.PutUint64(r[304:312], _z.C0.B2.A1[3])
+	binary.BigEndian.PutUint64(r[296:304], _z.C0.B2.A1[4])
+	binary.BigEndian.PutUint64(r[288:296], _z.C0.B2.A1[5])
+
+	binary.BigEndian.PutUint64(r[280:288], _z.C1.B0.A0[0])
+	binary.BigEndian.PutUint64(r[272:280], _z.C1.B0.A0[1])
+	binary.BigEndian.PutUint64(r[264:272], _z.C1.B0.A0[2])
+	binary.BigEndian.PutUint64(r[256:264], _z.C1.B0.A0[3])
+	binary.BigEndian.PutUint64(r[248:256], _z.C1.B0.A0[4])
+	binary.BigEndian.PutUint64(r[240:248], _z.C1.B0.A0[5])
+
+	binary.BigEndian.PutUint64(r[232:240], _z.C1.B0.A1[0])
+	binary.BigEndian.PutUint64(r[224:232], _z.C1.B0.A1[1])
+	binary.BigEndian.PutUint64(r[216:224], _z.C1.B0.A1[2])
+	binary.BigEndian.PutUint64(r[208:216], _z.C1.B0.A1[3])
+	binary.BigEndian.PutUint64(r[200:208], _z.C1.B0.A1[4])
+	binary.BigEndian.PutUint64(r[192:200], _z.C1.B0.A1[5])
+
+	binary.BigEndian.PutUint64(r[184:192], _z.C1.B1.A0[0])
+	binary.BigEndian.PutUint64(r[176:184], _z.C1.B1.A0[1])
+	binary.BigEndian.PutUint64(r[168:176], _z.C1.B1.A0[2])
+	binary.BigEndian.PutUint64(r[160:168], _z.C1.B1.A0[3])
+	binary.BigEndian.PutUint64(r[152:160], _z.C1.B1.A0[4])
+	binary.BigEndian.PutUint64(r[144:152], _z.C1.B1.A0[5])
+
+	binary.BigEndian.PutUint64(r[136:144], _z.C1.B1.A1[0])
+	binary.BigEndian.PutUint64(r[128:136], _z.C1.B1.A1[1])
+	binary.BigEndian.PutUint64(r[120:128], _z.C1.B1.A1[2])
+	binary.BigEndian.PutUint64(r[112:120], _z.C1.B1.A1[3])
+	binary.BigEndian.PutUint64(r[104:112], _z.C1.B1.A1[4])
+	binary.BigEndian.PutUint64(r[96:104], _z.C1.B1.A1[5])
+
+	binary.BigEndian.PutUint64(r[88:96], _z.C1.B2.A0[0])
+	binary.BigEndian.PutUint64(r[80:88], _z.C1.B2.A0[1])
+	binary.BigEndian.PutUint64(r[72:80], _z.C1.B2.A0[2])
+	binary.BigEndian.PutUint64(r[64:72], _z.C1.B2.A0[3])
+	binary.BigEndian.PutUint64(r[56:64], _z.C1.B2.A0[4])
+	binary.BigEndian.PutUint64(r[48:56], _z.C1.B2.A0[5])
+
+	binary.BigEndian.PutUint64(r[40:48], _z.C1.B2.A1[0])
+	binary.BigEndian.PutUint64(r[32:40], _z.C1.B2.A1[1])
+	binary.BigEndian.PutUint64(r[24:32], _z.C1.B2.A1[2])
+	binary.BigEndian.PutUint64(r[16:24], _z.C1.B2.A1[3])
+	binary.BigEndian.PutUint64(r[8:16], _z.C1.B2.A1[4])
+	binary.BigEndian.PutUint64(r[0:8], _z.C1.B2.A1[5])
+
+	return
+}
+
+// SetBytes interprets e as the bytes of a big-endian GT
+// sets z to that value (in Montgomery form), and returns z.
+// size(e) == 48 * 12
+// z.C1.B2.A1 | z.C1.B2.A0 | z.C1.B1.A1 | ...
+func (z *E12) SetBytes(e []byte) error {
+	if len(e) != SizeOfGT {
+		return errors.New("invalid buffer size")
+	}
+	z.C0.B0.A0.SetBytes(e[528 : 528+fp.Bytes])
+
+	z.C0.B0.A1.SetBytes(e[480 : 480+fp.Bytes])
+
+	z.C0.B1.A0.SetBytes(e[432 : 432+fp.Bytes])
+
+	z.C0.B1.A1.SetBytes(e[384 : 384+fp.Bytes])
+
+	z.C0.B2.A0.SetBytes(e[336 : 336+fp.Bytes])
+
+	z.C0.B2.A1.SetBytes(e[288 : 288+fp.Bytes])
+
+	z.C1.B0.A0.SetBytes(e[240 : 240+fp.Bytes])
+
+	z.C1.B0.A1.SetBytes(e[192 : 192+fp.Bytes])
+
+	z.C1.B1.A0.SetBytes(e[144 : 144+fp.Bytes])
+
+	z.C1.B1.A1.SetBytes(e[96 : 96+fp.Bytes])
+
+	z.C1.B2.A0.SetBytes(e[48 : 48+fp.Bytes])
+
+	z.C1.B2.A1.SetBytes(e[0 : 0+fp.Bytes])
+
+	return nil
+}
+
+// IsInSubGroup ensures GT/E12 is in correct sugroup
+func (z *E12) IsInSubGroup() bool {
+	var a, b E12
+
+	// check z^(Phi_k(p)) == 1
+	a.FrobeniusSquare(z)
+	b.FrobeniusSquare(&a).Mul(&b, z)
+
+	if !a.Equal(&b) {
+		return false
+	}
+
+	// check z^(p+1-t) == 1
+	a.Frobenius(z)
+	b.Expt(z)
+
+	return a.Equal(&b)
+}
diff --git a/ecc/bls12-378/internal/fptower/e12_pairing.go b/ecc/bls12-378/internal/fptower/e12_pairing.go
new file mode 100644
index 000000000..6441d245f
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12_pairing.go
@@ -0,0 +1,128 @@
+package fptower
+
+func (z *E12) nSquare(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquare(z)
+	}
+}
+
+func (z *E12) nSquareCompressed(n int) {
+	for i := 0; i < n; i++ {
+		z.CyclotomicSquareCompressed(z)
+	}
+}
+
+// Expt set z to x^t in E12 and return z
+func (z *E12) Expt(x *E12) *E12 {
+
+	// Expt computation is derived from the addition chain:
+	//
+	//	_1000     = 1 << 3
+	//	_1001     = 1 + _1000
+	//	_1001000  = _1001 << 3
+	//	_1010001  = _1001 + _1001000
+	//	_10011001 = _1001000 + _1010001
+	//	i67       = ((_10011001 << 5 + _1001) << 10 + _1010001) << 41
+	//	return      1 + i67
+	//
+	// Operations: 62 squares 6 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	// Allocate Temporaries.
+	var result, t0, t1 E12
+
+	// Step 3: result = x^0x8
+	result.CyclotomicSquare(x)
+	result.nSquare(2)
+
+	// Step 4: t0 = x^0x9
+	t0.Mul(x, &result)
+
+	// Step 7: t1 = x^0x48
+	t1.CyclotomicSquare(&t0)
+	t1.nSquare(2)
+
+	// Step 8: result = x^0x51
+	result.Mul(&t0, &t1)
+
+	// Step 9: t1 = x^0x99
+	t1.Mul(&t1, &result)
+
+	// Step 14: t1 = x^0x1320
+	t1.nSquare(5)
+
+	// Step 15: t0 = x^0x1329
+	t0.Mul(&t0, &t1)
+
+	// Step 25: t0 = x^0x4ca400
+	t0.nSquare(10)
+
+	// Step 26: result = x^0x4ca451
+	result.Mul(&result, &t0)
+
+	// Step 67: result = x^0x9948a20000000000
+	result.nSquareCompressed(41)
+	result.Decompress(&result)
+
+	// Step 68: result = x^0x9948a20000000001
+	z.Mul(x, &result)
+
+	return z
+}
+
+// MulBy014 multiplication by sparse element (c0, c1, 0, 0, c4)
+func (z *E12) MulBy014(c0, c1, c4 *E2) *E12 {
+
+	var a, b E6
+	var d E2
+
+	a.Set(&z.C0)
+	a.MulBy01(c0, c1)
+
+	b.Set(&z.C1)
+	b.MulBy1(c4)
+	d.Add(c1, c4)
+
+	z.C1.Add(&z.C1, &z.C0)
+	z.C1.MulBy01(c0, &d)
+	z.C1.Sub(&z.C1, &a)
+	z.C1.Sub(&z.C1, &b)
+	z.C0.MulByNonResidue(&b)
+	z.C0.Add(&z.C0, &a)
+
+	return z
+}
+
+// Mul014By014 multiplication of sparse element (c0,c1,0,0,c4,0) by sparse element (d0,d1,0,0,d4,0)
+func (z *E12) Mul014By014(d0, d1, d4, c0, c1, c4 *E2) *E12 {
+	var tmp, x0, x1, x4, x04, x01, x14 E2
+	x0.Mul(c0, d0)
+	x1.Mul(c1, d1)
+	x4.Mul(c4, d4)
+	tmp.Add(c0, c4)
+	x04.Add(d0, d4).
+		Mul(&x04, &tmp).
+		Sub(&x04, &x0).
+		Sub(&x04, &x4)
+	tmp.Add(c0, c1)
+	x01.Add(d0, d1).
+		Mul(&x01, &tmp).
+		Sub(&x01, &x0).
+		Sub(&x01, &x1)
+	tmp.Add(c1, c4)
+	x14.Add(d1, d4).
+		Mul(&x14, &tmp).
+		Sub(&x14, &x1).
+		Sub(&x14, &x4)
+
+	z.C0.B0.MulByNonResidue(&x4).
+		Add(&z.C0.B0, &x0)
+	z.C0.B1.Set(&x01)
+	z.C0.B2.Set(&x1)
+	z.C1.B0.SetZero()
+	z.C1.B1.Set(&x04)
+	z.C1.B2.Set(&x14)
+
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/e12_test.go b/ecc/bls12-378/internal/fptower/e12_test.go
new file mode 100644
index 000000000..939f945bf
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e12_test.go
@@ -0,0 +1,492 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE12Serialization(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+
+	properties.Property("[BLS12-378] SetBytes(Bytes()) should stay constant", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			buf := a.Bytes()
+			if err := b.SetBytes(buf[:]); err != nil {
+				return false
+			}
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE12ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genB := GenE12()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Cyclotomic square) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.CyclotomicSquare(a)
+			a.CyclotomicSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Frobenius) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Frobenius(a)
+			a.Frobenius(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (FrobeniusSquare) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusSquare(a)
+			a.FrobeniusSquare(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (FrobeniusCube) should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusCube(a)
+			a.FrobeniusCube(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE12Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genB := GenE12()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E12) bool {
+			var c E12
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E12) bool {
+			var c, d E12
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			var e, f, g E6
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.C0)
+			f.Double(&a.C1)
+			return c.C1.Equal(&g) && d.C0.Equal(&g) && e.Equal(&c.C0) && f.Equal(&d.C1)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] pi**12=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.Frobenius(a).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b).
+				Frobenius(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] (pi**2)**6=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusSquare(a).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b).
+				FrobeniusSquare(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] (pi**3)**4=id", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			b.FrobeniusCube(a).
+				FrobeniusCube(&b).
+				FrobeniusCube(&b).
+				FrobeniusCube(&b)
+			return b.Equal(a)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] cyclotomic square (Granger-Scott) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquare(a)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] compressed cyclotomic square (Karabina) and square should be the same in the cyclotomic subgroup", prop.ForAll(
+		func(a *E12) bool {
+			var b, c, d E12
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+			c.Square(a)
+			d.CyclotomicSquareCompressed(a).Decompress(&d)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] batch decompress and individual decompress (Karabina) should be the same", prop.ForAll(
+		func(a *E12) bool {
+			var b E12
+			// put in the cyclotomic subgroup
+			b.Conjugate(a)
+			a.Inverse(a)
+			b.Mul(&b, a)
+			a.FrobeniusSquare(&b).Mul(a, &b)
+
+			var a2, a4, a17 E12
+			a2.Set(a)
+			a4.Set(a)
+			a17.Set(a)
+			a2.nSquareCompressed(2)
+			a4.nSquareCompressed(4)
+			a17.nSquareCompressed(17)
+			batch := BatchDecompress([]E12{a2, a4, a17})
+			a2.Decompress(&a2)
+			a4.Decompress(&a4)
+			a17.Decompress(&a17)
+
+			return a2.Equal(&batch[0]) && a4.Equal(&batch[1]) && a17.Equal(&batch[2])
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Frobenius of x in E12 should be equal to x^q", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.Frobenius(a)
+			c.Exp(a, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] FrobeniusSquare of x in E12 should be equal to x^(q^2)", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.FrobeniusSquare(a)
+			c.Exp(a, *q).Exp(&c, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] FrobeniusCube of x in E12 should be equal to x^(q^3)", prop.ForAll(
+		func(a *E12) bool {
+			var b, c E12
+			q := fp.Modulus()
+			b.FrobeniusCube(a)
+			c.Exp(a, *q).Exp(&c, *q).Exp(&c, *q)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE12Add(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE12Sub(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE12Mul(b *testing.B) {
+	var a, c E12
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE12Cyclosquare(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.CyclotomicSquare(&a)
+	}
+}
+
+func BenchmarkE12Square(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE12Inverse(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE12Conjugate(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
+
+func BenchmarkE12Frobenius(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Frobenius(&a)
+	}
+}
+
+func BenchmarkE12FrobeniusSquare(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.FrobeniusSquare(&a)
+	}
+}
+
+func BenchmarkE12FrobeniusCube(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.FrobeniusCube(&a)
+	}
+}
+
+func BenchmarkE12Expt(b *testing.B) {
+	var a E12
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Expt(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/e2.go b/ecc/bls12-378/internal/fptower/e2.go
new file mode 100644
index 000000000..ff630a714
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2.go
@@ -0,0 +1,262 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"math/big"
+)
+
+// E2 is a degree two finite field extension of fp.Element
+type E2 struct {
+	A0, A1 fp.Element
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E2) Equal(x *E2) bool {
+	return z.A0.Equal(&x.A0) && z.A1.Equal(&x.A1)
+}
+
+// Cmp compares (lexicographic order) z and x and returns:
+//
+//   -1 if z <  x
+//    0 if z == x
+//   +1 if z >  x
+//
+func (z *E2) Cmp(x *E2) int {
+	if a1 := z.A1.Cmp(&x.A1); a1 != 0 {
+		return a1
+	}
+	return z.A0.Cmp(&x.A0)
+}
+
+// LexicographicallyLargest returns true if this element is strictly lexicographically
+// larger than its negation, false otherwise
+func (z *E2) LexicographicallyLargest() bool {
+	// adapted from github.com/zkcrypto/bls12_381
+	if z.A1.IsZero() {
+		return z.A0.LexicographicallyLargest()
+	}
+	return z.A1.LexicographicallyLargest()
+}
+
+// SetString sets a E2 element from strings
+func (z *E2) SetString(s1, s2 string) *E2 {
+	z.A0.SetString(s1)
+	z.A1.SetString(s2)
+	return z
+}
+
+// SetZero sets an E2 elmt to zero
+func (z *E2) SetZero() *E2 {
+	z.A0.SetZero()
+	z.A1.SetZero()
+	return z
+}
+
+// Set sets an E2 from x
+func (z *E2) Set(x *E2) *E2 {
+	z.A0 = x.A0
+	z.A1 = x.A1
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E2) SetOne() *E2 {
+	z.A0.SetOne()
+	z.A1.SetZero()
+	return z
+}
+
+// SetRandom sets a0 and a1 to random values
+func (z *E2) SetRandom() (*E2, error) {
+	if _, err := z.A0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.A1.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// IsZero returns true if the two elements are equal, fasle otherwise
+func (z *E2) IsZero() bool {
+	return z.A0.IsZero() && z.A1.IsZero()
+}
+
+// Add adds two elements of E2
+func (z *E2) Add(x, y *E2) *E2 {
+	addE2(z, x, y)
+	return z
+}
+
+// Sub two elements of E2
+func (z *E2) Sub(x, y *E2) *E2 {
+	subE2(z, x, y)
+	return z
+}
+
+// Double doubles an E2 element
+func (z *E2) Double(x *E2) *E2 {
+	doubleE2(z, x)
+	return z
+}
+
+// Neg negates an E2 element
+func (z *E2) Neg(x *E2) *E2 {
+	negE2(z, x)
+	return z
+}
+
+// String implements Stringer interface for fancy printing
+func (z *E2) String() string {
+	return (z.A0.String() + "+" + z.A1.String() + "*u")
+}
+
+// ToMont converts to mont form
+func (z *E2) ToMont() *E2 {
+	z.A0.ToMont()
+	z.A1.ToMont()
+	return z
+}
+
+// FromMont converts from mont form
+func (z *E2) FromMont() *E2 {
+	z.A0.FromMont()
+	z.A1.FromMont()
+	return z
+}
+
+// MulByElement multiplies an element in E2 by an element in fp
+func (z *E2) MulByElement(x *E2, y *fp.Element) *E2 {
+	var yCopy fp.Element
+	yCopy.Set(y)
+	z.A0.Mul(&x.A0, &yCopy)
+	z.A1.Mul(&x.A1, &yCopy)
+	return z
+}
+
+// Conjugate conjugates an element in E2
+func (z *E2) Conjugate(x *E2) *E2 {
+	z.A0 = x.A0
+	z.A1.Neg(&x.A1)
+	return z
+}
+
+// Halve sets z = z / 2
+func (z *E2) Halve() {
+	z.A0.Halve()
+	z.A1.Halve()
+}
+
+// Legendre returns the Legendre symbol of z
+func (z *E2) Legendre() int {
+	var n fp.Element
+	z.norm(&n)
+	return n.Legendre()
+}
+
+// Exp sets z=x**e and returns it
+func (z *E2) Exp(x E2, exponent *big.Int) *E2 {
+	z.SetOne()
+	b := exponent.Bytes()
+	for i := 0; i < len(b); i++ {
+		w := b[i]
+		for j := 0; j < 8; j++ {
+			z.Square(z)
+			if (w & (0b10000000 >> j)) != 0 {
+				z.Mul(z, &x)
+			}
+		}
+	}
+
+	return z
+}
+
+// Sqrt sets z to the square root of and returns z
+// The function does not test wether the square root
+// exists or not, it's up to the caller to call
+// Legendre beforehand.
+// cf https://eprint.iacr.org/2012/685.pdf (algo 10)
+func (z *E2) Sqrt(x *E2) *E2 {
+
+	// precomputation
+	var b, c, d, e, f, x0 E2
+	var _b, o fp.Element
+
+	// c must be a non square (works for p=1 mod 12 hence 1 mod 4, only bls377 has such a p currently)
+	c.A1.SetOne()
+
+	q := fp.Modulus()
+	var exp, one big.Int
+	one.SetUint64(1)
+	exp.Set(q).Sub(&exp, &one).Rsh(&exp, 1)
+	d.Exp(c, &exp)
+	e.Mul(&d, &c).Inverse(&e)
+	f.Mul(&d, &c).Square(&f)
+
+	// computation
+	exp.Rsh(&exp, 1)
+	b.Exp(*x, &exp)
+	b.norm(&_b)
+	o.SetOne()
+	if _b.Equal(&o) {
+		x0.Square(&b).Mul(&x0, x)
+		_b.Set(&x0.A0).Sqrt(&_b)
+		z.Conjugate(&b).MulByElement(z, &_b)
+		return z
+	}
+	x0.Square(&b).Mul(&x0, x).Mul(&x0, &f)
+	_b.Set(&x0.A0).Sqrt(&_b)
+	z.Conjugate(&b).MulByElement(z, &_b).Mul(z, &e)
+
+	return z
+}
+
+// BatchInvert returns a new slice with every element inverted.
+// Uses Montgomery batch inversion trick
+func BatchInvert(a []E2) []E2 {
+	res := make([]E2, len(a))
+	if len(a) == 0 {
+		return res
+	}
+
+	zeroes := make([]bool, len(a))
+	var accumulator E2
+	accumulator.SetOne()
+
+	for i := 0; i < len(a); i++ {
+		if a[i].IsZero() {
+			zeroes[i] = true
+			continue
+		}
+		res[i].Set(&accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	accumulator.Inverse(&accumulator)
+
+	for i := len(a) - 1; i >= 0; i-- {
+		if zeroes[i] {
+			continue
+		}
+		res[i].Mul(&res[i], &accumulator)
+		accumulator.Mul(&accumulator, &a[i])
+	}
+
+	return res
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_amd64.go b/ecc/bls12-378/internal/fptower/e2_amd64.go
new file mode 100644
index 000000000..1e55f1994
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_amd64.go
@@ -0,0 +1,45 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// q (modulus)
+var qE2 = [6]uint64{
+	11045256207009841153,
+	14886639130118979584,
+	10956628289047010687,
+	9513184293603517222,
+	6038022134869067682,
+	283357621510263184,
+}
+
+// q'[0], see montgommery multiplication algorithm
+var (
+	qE2Inv0 uint64 = 11045256207009841151
+	_              = qE2Inv0 // used in asm
+)
+
+//go:noescape
+func addE2(res, x, y *E2)
+
+//go:noescape
+func subE2(res, x, y *E2)
+
+//go:noescape
+func doubleE2(res, x *E2)
+
+//go:noescape
+func negE2(res, x *E2)
diff --git a/ecc/bls12-378/internal/fptower/e2_amd64.s b/ecc/bls12-378/internal/fptower/e2_amd64.s
new file mode 100644
index 000000000..db266c308
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_amd64.s
@@ -0,0 +1,320 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "textflag.h"
+#include "funcdata.h"
+
+// modulus q
+DATA q<>+0(SB)/8, $0x9948a20000000001
+DATA q<>+8(SB)/8, $0xce97f76a822c0000
+DATA q<>+16(SB)/8, $0x980dc360d0a49d7f
+DATA q<>+24(SB)/8, $0x84059eb647102326
+DATA q<>+32(SB)/8, $0x53cb5d240ed107a2
+DATA q<>+40(SB)/8, $0x03eeb0416684d190
+GLOBL q<>(SB), (RODATA+NOPTR), $48
+
+// qInv0 q'[0]
+DATA qInv0<>(SB)/8, $0x9948a1ffffffffff
+GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
+
+#define REDUCE(ra0, ra1, ra2, ra3, ra4, ra5, rb0, rb1, rb2, rb3, rb4, rb5) \
+	MOVQ    ra0, rb0;        \
+	SUBQ    q<>(SB), ra0;    \
+	MOVQ    ra1, rb1;        \
+	SBBQ    q<>+8(SB), ra1;  \
+	MOVQ    ra2, rb2;        \
+	SBBQ    q<>+16(SB), ra2; \
+	MOVQ    ra3, rb3;        \
+	SBBQ    q<>+24(SB), ra3; \
+	MOVQ    ra4, rb4;        \
+	SBBQ    q<>+32(SB), ra4; \
+	MOVQ    ra5, rb5;        \
+	SBBQ    q<>+40(SB), ra5; \
+	CMOVQCS rb0, ra0;        \
+	CMOVQCS rb1, ra1;        \
+	CMOVQCS rb2, ra2;        \
+	CMOVQCS rb3, ra3;        \
+	CMOVQCS rb4, ra4;        \
+	CMOVQCS rb5, ra5;        \
+
+TEXT ·addE2(SB), NOSPLIT, $0-24
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), BX
+	MOVQ 8(AX), SI
+	MOVQ 16(AX), DI
+	MOVQ 24(AX), R8
+	MOVQ 32(AX), R9
+	MOVQ 40(AX), R10
+	MOVQ y+16(FP), DX
+	ADDQ 0(DX), BX
+	ADCQ 8(DX), SI
+	ADCQ 16(DX), DI
+	ADCQ 24(DX), R8
+	ADCQ 32(DX), R9
+	ADCQ 40(DX), R10
+
+	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
+	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
+
+	MOVQ res+0(FP), CX
+	MOVQ BX, 0(CX)
+	MOVQ SI, 8(CX)
+	MOVQ DI, 16(CX)
+	MOVQ R8, 24(CX)
+	MOVQ R9, 32(CX)
+	MOVQ R10, 40(CX)
+	MOVQ 48(AX), BX
+	MOVQ 56(AX), SI
+	MOVQ 64(AX), DI
+	MOVQ 72(AX), R8
+	MOVQ 80(AX), R9
+	MOVQ 88(AX), R10
+	ADDQ 48(DX), BX
+	ADCQ 56(DX), SI
+	ADCQ 64(DX), DI
+	ADCQ 72(DX), R8
+	ADCQ 80(DX), R9
+	ADCQ 88(DX), R10
+
+	// reduce element(BX,SI,DI,R8,R9,R10) using temp registers (R11,R12,R13,R14,R15,s0-8(SP))
+	REDUCE(BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15,s0-8(SP))
+
+	MOVQ BX, 48(CX)
+	MOVQ SI, 56(CX)
+	MOVQ DI, 64(CX)
+	MOVQ R8, 72(CX)
+	MOVQ R9, 80(CX)
+	MOVQ R10, 88(CX)
+	RET
+
+TEXT ·doubleE2(SB), NOSPLIT, $0-16
+	MOVQ res+0(FP), DX
+	MOVQ x+8(FP), AX
+	MOVQ 0(AX), CX
+	MOVQ 8(AX), BX
+	MOVQ 16(AX), SI
+	MOVQ 24(AX), DI
+	MOVQ 32(AX), R8
+	MOVQ 40(AX), R9
+	ADDQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ CX, 0(DX)
+	MOVQ BX, 8(DX)
+	MOVQ SI, 16(DX)
+	MOVQ DI, 24(DX)
+	MOVQ R8, 32(DX)
+	MOVQ R9, 40(DX)
+	MOVQ 48(AX), CX
+	MOVQ 56(AX), BX
+	MOVQ 64(AX), SI
+	MOVQ 72(AX), DI
+	MOVQ 80(AX), R8
+	MOVQ 88(AX), R9
+	ADDQ CX, CX
+	ADCQ BX, BX
+	ADCQ SI, SI
+	ADCQ DI, DI
+	ADCQ R8, R8
+	ADCQ R9, R9
+
+	// reduce element(CX,BX,SI,DI,R8,R9) using temp registers (R10,R11,R12,R13,R14,R15)
+	REDUCE(CX,BX,SI,DI,R8,R9,R10,R11,R12,R13,R14,R15)
+
+	MOVQ CX, 48(DX)
+	MOVQ BX, 56(DX)
+	MOVQ SI, 64(DX)
+	MOVQ DI, 72(DX)
+	MOVQ R8, 80(DX)
+	MOVQ R9, 88(DX)
+	RET
+
+TEXT ·subE2(SB), NOSPLIT, $0-24
+	XORQ    R9, R9
+	MOVQ    x+8(FP), R8
+	MOVQ    0(R8), AX
+	MOVQ    8(R8), DX
+	MOVQ    16(R8), CX
+	MOVQ    24(R8), BX
+	MOVQ    32(R8), SI
+	MOVQ    40(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    0(R8), AX
+	SBBQ    8(R8), DX
+	SBBQ    16(R8), CX
+	SBBQ    24(R8), BX
+	SBBQ    32(R8), SI
+	SBBQ    40(R8), DI
+	MOVQ    x+8(FP), R8
+	MOVQ    $0x9948a20000000001, R10
+	MOVQ    $0xce97f76a822c0000, R11
+	MOVQ    $0x980dc360d0a49d7f, R12
+	MOVQ    $0x84059eb647102326, R13
+	MOVQ    $0x53cb5d240ed107a2, R14
+	MOVQ    $0x03eeb0416684d190, R15
+	CMOVQCC R9, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	ADDQ    R10, AX
+	ADCQ    R11, DX
+	ADCQ    R12, CX
+	ADCQ    R13, BX
+	ADCQ    R14, SI
+	ADCQ    R15, DI
+	MOVQ    res+0(FP), R10
+	MOVQ    AX, 0(R10)
+	MOVQ    DX, 8(R10)
+	MOVQ    CX, 16(R10)
+	MOVQ    BX, 24(R10)
+	MOVQ    SI, 32(R10)
+	MOVQ    DI, 40(R10)
+	MOVQ    48(R8), AX
+	MOVQ    56(R8), DX
+	MOVQ    64(R8), CX
+	MOVQ    72(R8), BX
+	MOVQ    80(R8), SI
+	MOVQ    88(R8), DI
+	MOVQ    y+16(FP), R8
+	SUBQ    48(R8), AX
+	SBBQ    56(R8), DX
+	SBBQ    64(R8), CX
+	SBBQ    72(R8), BX
+	SBBQ    80(R8), SI
+	SBBQ    88(R8), DI
+	MOVQ    $0x9948a20000000001, R11
+	MOVQ    $0xce97f76a822c0000, R12
+	MOVQ    $0x980dc360d0a49d7f, R13
+	MOVQ    $0x84059eb647102326, R14
+	MOVQ    $0x53cb5d240ed107a2, R15
+	MOVQ    $0x03eeb0416684d190, R10
+	CMOVQCC R9, R11
+	CMOVQCC R9, R12
+	CMOVQCC R9, R13
+	CMOVQCC R9, R14
+	CMOVQCC R9, R15
+	CMOVQCC R9, R10
+	ADDQ    R11, AX
+	ADCQ    R12, DX
+	ADCQ    R13, CX
+	ADCQ    R14, BX
+	ADCQ    R15, SI
+	ADCQ    R10, DI
+	MOVQ    res+0(FP), R8
+	MOVQ    AX, 48(R8)
+	MOVQ    DX, 56(R8)
+	MOVQ    CX, 64(R8)
+	MOVQ    BX, 72(R8)
+	MOVQ    SI, 80(R8)
+	MOVQ    DI, 88(R8)
+	RET
+
+TEXT ·negE2(SB), NOSPLIT, $0-16
+	MOVQ  res+0(FP), DX
+	MOVQ  x+8(FP), AX
+	MOVQ  0(AX), BX
+	MOVQ  8(AX), SI
+	MOVQ  16(AX), DI
+	MOVQ  24(AX), R8
+	MOVQ  32(AX), R9
+	MOVQ  40(AX), R10
+	MOVQ  BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	TESTQ AX, AX
+	JNE   l1
+	MOVQ  AX, 0(DX)
+	MOVQ  AX, 8(DX)
+	MOVQ  AX, 16(DX)
+	MOVQ  AX, 24(DX)
+	MOVQ  AX, 32(DX)
+	MOVQ  AX, 40(DX)
+	JMP   l3
+
+l1:
+	MOVQ $0x9948a20000000001, CX
+	SUBQ BX, CX
+	MOVQ CX, 0(DX)
+	MOVQ $0xce97f76a822c0000, CX
+	SBBQ SI, CX
+	MOVQ CX, 8(DX)
+	MOVQ $0x980dc360d0a49d7f, CX
+	SBBQ DI, CX
+	MOVQ CX, 16(DX)
+	MOVQ $0x84059eb647102326, CX
+	SBBQ R8, CX
+	MOVQ CX, 24(DX)
+	MOVQ $0x53cb5d240ed107a2, CX
+	SBBQ R9, CX
+	MOVQ CX, 32(DX)
+	MOVQ $0x03eeb0416684d190, CX
+	SBBQ R10, CX
+	MOVQ CX, 40(DX)
+
+l3:
+	MOVQ  x+8(FP), AX
+	MOVQ  48(AX), BX
+	MOVQ  56(AX), SI
+	MOVQ  64(AX), DI
+	MOVQ  72(AX), R8
+	MOVQ  80(AX), R9
+	MOVQ  88(AX), R10
+	MOVQ  BX, AX
+	ORQ   SI, AX
+	ORQ   DI, AX
+	ORQ   R8, AX
+	ORQ   R9, AX
+	ORQ   R10, AX
+	TESTQ AX, AX
+	JNE   l2
+	MOVQ  AX, 48(DX)
+	MOVQ  AX, 56(DX)
+	MOVQ  AX, 64(DX)
+	MOVQ  AX, 72(DX)
+	MOVQ  AX, 80(DX)
+	MOVQ  AX, 88(DX)
+	RET
+
+l2:
+	MOVQ $0x9948a20000000001, CX
+	SUBQ BX, CX
+	MOVQ CX, 48(DX)
+	MOVQ $0xce97f76a822c0000, CX
+	SBBQ SI, CX
+	MOVQ CX, 56(DX)
+	MOVQ $0x980dc360d0a49d7f, CX
+	SBBQ DI, CX
+	MOVQ CX, 64(DX)
+	MOVQ $0x84059eb647102326, CX
+	SBBQ R8, CX
+	MOVQ CX, 72(DX)
+	MOVQ $0x53cb5d240ed107a2, CX
+	SBBQ R9, CX
+	MOVQ CX, 80(DX)
+	MOVQ $0x03eeb0416684d190, CX
+	SBBQ R10, CX
+	MOVQ CX, 88(DX)
+	RET
diff --git a/ecc/bls12-378/internal/fptower/e2_bls378.go b/ecc/bls12-378/internal/fptower/e2_bls378.go
new file mode 100644
index 000000000..677180aef
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_bls378.go
@@ -0,0 +1,104 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+// Mul sets z to the E2-product of x,y, returns z
+func (z *E2) Mul(x, y *E2) *E2 {
+	var a, b, c fp.Element
+	a.Add(&x.A0, &x.A1)
+	b.Add(&y.A0, &y.A1)
+	a.Mul(&a, &b)
+	b.Mul(&x.A0, &y.A0)
+	c.Mul(&x.A1, &y.A1)
+	z.A1.Sub(&a, &b).Sub(&z.A1, &c)
+	fp.MulBy5(&c)
+	z.A0.Sub(&b, &c)
+	return z
+}
+
+// Square sets z to the E2-product of x,x returns z
+func (z *E2) Square(x *E2) *E2 {
+	//algo 22 https://eprint.iacr.org/2010/354.pdf
+	var c0, c2 fp.Element
+	c0.Add(&x.A0, &x.A1)
+	c2.Neg(&x.A1)
+	fp.MulBy5(&c2)
+	c2.Add(&c2, &x.A0)
+
+	c0.Mul(&c0, &c2) // (x1+x2)*(x1+(u**2)x2)
+	c2.Mul(&x.A0, &x.A1).Double(&c2)
+	z.A1 = c2
+	c2.Double(&c2)
+	z.A0.Add(&c0, &c2)
+
+	return z
+}
+
+// MulByNonResidue multiplies a E2 by (0,1)
+func (z *E2) MulByNonResidue(x *E2) *E2 {
+	a := x.A0
+	b := x.A1 // fetching x.A1 in the function below is slower
+	fp.MulBy5(&b)
+	z.A0.Neg(&b)
+	z.A1 = a
+	return z
+}
+
+// MulByNonResidueInv multiplies a E2 by (0,1)^{-1}
+func (z *E2) MulByNonResidueInv(x *E2) *E2 {
+	//z.A1.MulByNonResidueInv(&x.A0)
+	a := x.A1
+	fiveinv := fp.Element{
+		4714375566610504077,
+		585136512338283717,
+		16899133777167898908,
+		1882388787078723660,
+		12465292654455594957,
+		119042783712594200,
+	}
+	z.A1.Mul(&x.A0, &fiveinv).Neg(&z.A1)
+	z.A0 = a
+	return z
+}
+
+// Inverse sets z to the E2-inverse of x, returns z
+func (z *E2) Inverse(x *E2) *E2 {
+	// Algorithm 8 from https://eprint.iacr.org/2010/354.pdf
+	//var a, b, t0, t1, tmp fp.Element
+	var t0, t1, tmp fp.Element
+	a := &x.A0 // creating the buffers a, b is faster than querying &x.A0, &x.A1 in the functions call below
+	b := &x.A1
+	t0.Square(a)
+	t1.Square(b)
+	tmp.Set(&t1)
+	fp.MulBy5(&tmp)
+	t0.Add(&t0, &tmp)
+	t1.Inverse(&t0)
+	z.A0.Mul(a, &t1)
+	z.A1.Mul(b, &t1).Neg(&z.A1)
+
+	return z
+}
+
+// norm sets x to the norm of z
+func (z *E2) norm(x *fp.Element) {
+	var tmp fp.Element
+	x.Square(&z.A1)
+	tmp.Set(x)
+	fp.MulBy5(&tmp)
+	x.Square(&z.A0).Add(x, &tmp)
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_fallback.go b/ecc/bls12-378/internal/fptower/e2_fallback.go
new file mode 100644
index 000000000..0ce4d8333
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_fallback.go
@@ -0,0 +1,40 @@
+//go:build !amd64
+// +build !amd64
+
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+func addE2(z, x, y *E2) {
+	z.A0.Add(&x.A0, &y.A0)
+	z.A1.Add(&x.A1, &y.A1)
+}
+
+func subE2(z, x, y *E2) {
+	z.A0.Sub(&x.A0, &y.A0)
+	z.A1.Sub(&x.A1, &y.A1)
+}
+
+func doubleE2(z, x *E2) {
+	z.A0.Double(&x.A0)
+	z.A1.Double(&x.A1)
+}
+
+func negE2(z, x *E2) {
+	z.A0.Neg(&x.A0)
+	z.A1.Neg(&x.A1)
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_test.go b/ecc/bls12-378/internal/fptower/e2_test.go
new file mode 100644
index 000000000..0c3f7e257
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e2_test.go
@@ -0,0 +1,506 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"crypto/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE2ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE2()
+	genB := GenE2()
+	genfp := GenFp()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue inverse) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidueInv(a)
+			a.MulByNonResidueInv(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Conjugate) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Conjugate(a)
+			a.Conjugate(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by element) should output the same result", prop.ForAll(
+		func(a *E2, b fp.Element) bool {
+			var c E2
+			c.MulByElement(a, &b)
+			a.MulByElement(a, &b)
+			return a.Equal(&c)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Sqrt) should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d, s E2
+
+			s.Square(a)
+			a.Set(&s)
+			b.Set(&s)
+
+			a.Sqrt(a)
+			b.Sqrt(&b)
+
+			c.Square(a)
+			d.Square(&b)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+func TestE2MulMaxed(t *testing.T) {
+	// let's pick a and b, with maxed A0 and A1
+	var a, b E2
+	fpMaxValue := fp.Element{
+		11045256207009841153,
+		14886639130118979584,
+		10956628289047010687,
+		9513184293603517222,
+		6038022134869067682,
+		283357621510263184,
+	}
+	fpMaxValue[0]--
+
+	a.A0 = fpMaxValue
+	a.A1 = fpMaxValue
+	b.A0 = fpMaxValue
+	b.A1 = fpMaxValue
+
+	var c, d E2
+	d.Inverse(&b)
+	c.Set(&a)
+	c.Mul(&c, &b).Mul(&c, &d)
+	if !c.Equal(&a) {
+		t.Fatal("mul with max fp failed")
+	}
+}
+
+func TestE2Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE2()
+	genB := GenE2()
+	genfp := GenFp()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E2) bool {
+			var c E2
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E2) bool {
+			var c, d E2
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] BatchInvert should output the same result as Inverse", prop.ForAll(
+		func(a, b, c *E2) bool {
+
+			batch := BatchInvert([]E2{*a, *b, *c})
+			a.Inverse(a)
+			b.Inverse(b)
+			c.Inverse(c)
+			return a.Equal(&batch[0]) && b.Equal(&batch[1]) && c.Equal(&batch[2])
+		},
+		genA,
+		genA,
+		genA,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b, c E2
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] MulByElement MulByElement inverse should leave an element invariant", prop.ForAll(
+		func(a *E2, b fp.Element) bool {
+			var c E2
+			var d fp.Element
+			d.Inverse(&b)
+			c.MulByElement(a, &b).MulByElement(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genfp,
+	))
+
+	properties.Property("[BLS12-378] Double and mul by 2 should output the same result", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			var c fp.Element
+			c.SetUint64(2)
+			b.Double(a)
+			a.MulByElement(a, &c)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Mulbynonres mulbynonresinv should leave the element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.MulByNonResidue(a).MulByNonResidueInv(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] a + pi(a), a-pi(a) should be real", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d E2
+			var e, f fp.Element
+			b.Conjugate(a)
+			c.Add(a, &b)
+			d.Sub(a, &b)
+			e.Double(&a.A0)
+			f.Double(&a.A1)
+			return c.A1.IsZero() && d.A0.IsZero() && e.Equal(&c.A0) && f.Equal(&d.A1)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Legendre on square should output 1", prop.ForAll(
+		func(a *E2) bool {
+			var b E2
+			b.Square(a)
+			c := b.Legendre()
+			return c == 1
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square(sqrt) should leave an element invariant", prop.ForAll(
+		func(a *E2) bool {
+			var b, c, d, e E2
+			b.Square(a)
+			c.Sqrt(&b)
+			d.Square(&c)
+			e.Neg(a)
+			return (c.Equal(a) || c.Equal(&e)) && d.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg(E2) == neg(E2.A0, E2.A1)", prop.ForAll(
+		func(a *E2) bool {
+			var b, c E2
+			b.Neg(a)
+			c.A0.Neg(&a.A0)
+			c.A1.Neg(&a.A1)
+			return c.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Cmp and LexicographicallyLargest should be consistant", prop.ForAll(
+		func(a *E2) bool {
+			var negA E2
+			negA.Neg(a)
+			cmpResult := a.Cmp(&negA)
+			lResult := a.LexicographicallyLargest()
+			if lResult && cmpResult == 1 {
+				return true
+			}
+			if !lResult && cmpResult != 1 {
+				return true
+			}
+			return false
+		},
+		genA,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	if supportAdx {
+		t.Log("disabling ADX")
+		supportAdx = false
+		properties.TestingRun(t, gopter.ConsoleReporter(false))
+		supportAdx = true
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE2Add(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE2Sub(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE2Mul(b *testing.B) {
+	var a, c E2
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE2MulByElement(b *testing.B) {
+	var a E2
+	var c fp.Element
+	c.SetRandom()
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByElement(&a, &c)
+	}
+}
+
+func BenchmarkE2Square(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE2Sqrt(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sqrt(&a)
+	}
+}
+
+func BenchmarkE2Exp(b *testing.B) {
+	var x E2
+	x.SetRandom()
+	b1, _ := rand.Int(rand.Reader, fp.Modulus())
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		x.Exp(x, b1)
+	}
+}
+
+func BenchmarkE2Inverse(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
+
+func BenchmarkE2MulNonRes(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidue(&a)
+	}
+}
+
+func BenchmarkE2MulNonResInv(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.MulByNonResidueInv(&a)
+	}
+}
+
+func BenchmarkE2Conjugate(b *testing.B) {
+	var a E2
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Conjugate(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/e6.go b/ecc/bls12-378/internal/fptower/e6.go
new file mode 100644
index 000000000..adc33ceef
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e6.go
@@ -0,0 +1,264 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+// E6 is a degree three finite field extension of fp2
+type E6 struct {
+	B0, B1, B2 E2
+}
+
+// Equal returns true if z equals x, fasle otherwise
+func (z *E6) Equal(x *E6) bool {
+	return z.B0.Equal(&x.B0) && z.B1.Equal(&x.B1) && z.B2.Equal(&x.B2)
+}
+
+// SetString sets a E6 elmt from stringf
+func (z *E6) SetString(s1, s2, s3, s4, s5, s6 string) *E6 {
+	z.B0.SetString(s1, s2)
+	z.B1.SetString(s3, s4)
+	z.B2.SetString(s5, s6)
+	return z
+}
+
+// Set Sets a E6 elmt form another E6 elmt
+func (z *E6) Set(x *E6) *E6 {
+	z.B0 = x.B0
+	z.B1 = x.B1
+	z.B2 = x.B2
+	return z
+}
+
+// SetOne sets z to 1 in Montgomery form and returns z
+func (z *E6) SetOne() *E6 {
+	*z = E6{}
+	z.B0.A0.SetOne()
+	return z
+}
+
+// SetRandom set z to a random elmt
+func (z *E6) SetRandom() (*E6, error) {
+	if _, err := z.B0.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B1.SetRandom(); err != nil {
+		return nil, err
+	}
+	if _, err := z.B2.SetRandom(); err != nil {
+		return nil, err
+	}
+	return z, nil
+}
+
+// ToMont converts to Mont form
+func (z *E6) ToMont() *E6 {
+	z.B0.ToMont()
+	z.B1.ToMont()
+	z.B2.ToMont()
+	return z
+}
+
+// FromMont converts from Mont form
+func (z *E6) FromMont() *E6 {
+	z.B0.FromMont()
+	z.B1.FromMont()
+	z.B2.FromMont()
+	return z
+}
+
+// Add adds two elements of E6
+func (z *E6) Add(x, y *E6) *E6 {
+	z.B0.Add(&x.B0, &y.B0)
+	z.B1.Add(&x.B1, &y.B1)
+	z.B2.Add(&x.B2, &y.B2)
+	return z
+}
+
+// Neg negates the E6 number
+func (z *E6) Neg(x *E6) *E6 {
+	z.B0.Neg(&x.B0)
+	z.B1.Neg(&x.B1)
+	z.B2.Neg(&x.B2)
+	return z
+}
+
+// Sub two elements of E6
+func (z *E6) Sub(x, y *E6) *E6 {
+	z.B0.Sub(&x.B0, &y.B0)
+	z.B1.Sub(&x.B1, &y.B1)
+	z.B2.Sub(&x.B2, &y.B2)
+	return z
+}
+
+// Double doubles an element in E6
+func (z *E6) Double(x *E6) *E6 {
+	z.B0.Double(&x.B0)
+	z.B1.Double(&x.B1)
+	z.B2.Double(&x.B2)
+	return z
+}
+
+// String puts E6 elmt in string form
+func (z *E6) String() string {
+	return (z.B0.String() + "+(" + z.B1.String() + ")*v+(" + z.B2.String() + ")*v**2")
+}
+
+// MulByNonResidue mul x by (0,1,0)
+func (z *E6) MulByNonResidue(x *E6) *E6 {
+	z.B2, z.B1, z.B0 = x.B1, x.B0, x.B2
+	z.B0.MulByNonResidue(&z.B0)
+	return z
+}
+
+// MulByE2 multiplies an element in E6 by an element in E2
+func (z *E6) MulByE2(x *E6, y *E2) *E6 {
+	var yCopy E2
+	yCopy.Set(y)
+	z.B0.Mul(&x.B0, &yCopy)
+	z.B1.Mul(&x.B1, &yCopy)
+	z.B2.Mul(&x.B2, &yCopy)
+	return z
+}
+
+// MulBy01 multiplication by sparse element (c0,c1,0)
+func (z *E6) MulBy01(c0, c1 *E2) *E6 {
+
+	var a, b, tmp, t0, t1, t2 E2
+
+	a.Mul(&z.B0, c0)
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+	t0.Add(&t0, &a)
+
+	tmp.Add(&z.B0, &z.B2)
+	t2.Mul(c0, &tmp)
+	t2.Sub(&t2, &a)
+	t2.Add(&t2, &b)
+
+	t1.Add(c0, c1)
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(&t1, &tmp)
+	t1.Sub(&t1, &a)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&t2)
+
+	return z
+}
+
+// MulBy1 multiplication of E6 by sparse element (0, c1, 0)
+func (z *E6) MulBy1(c1 *E2) *E6 {
+
+	var b, tmp, t0, t1 E2
+	b.Mul(&z.B1, c1)
+
+	tmp.Add(&z.B1, &z.B2)
+	t0.Mul(c1, &tmp)
+	t0.Sub(&t0, &b)
+	t0.MulByNonResidue(&t0)
+
+	tmp.Add(&z.B0, &z.B1)
+	t1.Mul(c1, &tmp)
+	t1.Sub(&t1, &b)
+
+	z.B0.Set(&t0)
+	z.B1.Set(&t1)
+	z.B2.Set(&b)
+
+	return z
+}
+
+// Mul sets z to the E6 product of x,y, returns z
+func (z *E6) Mul(x, y *E6) *E6 {
+	// Algorithm 13 from https://eprint.iacr.org/2010/354.pdf
+	var t0, t1, t2, c0, c1, c2, tmp E2
+	t0.Mul(&x.B0, &y.B0)
+	t1.Mul(&x.B1, &y.B1)
+	t2.Mul(&x.B2, &y.B2)
+
+	c0.Add(&x.B1, &x.B2)
+	tmp.Add(&y.B1, &y.B2)
+	c0.Mul(&c0, &tmp).Sub(&c0, &t1).Sub(&c0, &t2).MulByNonResidue(&c0).Add(&c0, &t0)
+
+	c1.Add(&x.B0, &x.B1)
+	tmp.Add(&y.B0, &y.B1)
+	c1.Mul(&c1, &tmp).Sub(&c1, &t0).Sub(&c1, &t1)
+	tmp.MulByNonResidue(&t2)
+	c1.Add(&c1, &tmp)
+
+	tmp.Add(&x.B0, &x.B2)
+	c2.Add(&y.B0, &y.B2).Mul(&c2, &tmp).Sub(&c2, &t0).Sub(&c2, &t2).Add(&c2, &t1)
+
+	z.B0.Set(&c0)
+	z.B1.Set(&c1)
+	z.B2.Set(&c2)
+
+	return z
+}
+
+// Square sets z to the E6 product of x,x, returns z
+func (z *E6) Square(x *E6) *E6 {
+
+	// Algorithm 16 from https://eprint.iacr.org/2010/354.pdf
+	var c4, c5, c1, c2, c3, c0 E2
+	c4.Mul(&x.B0, &x.B1).Double(&c4)
+	c5.Square(&x.B2)
+	c1.MulByNonResidue(&c5).Add(&c1, &c4)
+	c2.Sub(&c4, &c5)
+	c3.Square(&x.B0)
+	c4.Sub(&x.B0, &x.B1).Add(&c4, &x.B2)
+	c5.Mul(&x.B1, &x.B2).Double(&c5)
+	c4.Square(&c4)
+	c0.MulByNonResidue(&c5).Add(&c0, &c3)
+	z.B2.Add(&c2, &c4).Add(&z.B2, &c5).Sub(&z.B2, &c3)
+	z.B0.Set(&c0)
+	z.B1.Set(&c1)
+
+	return z
+}
+
+// Inverse an element in E6
+func (z *E6) Inverse(x *E6) *E6 {
+	// Algorithm 17 from https://eprint.iacr.org/2010/354.pdf
+	// step 9 is wrong in the paper it's t1-t4
+	var t0, t1, t2, t3, t4, t5, t6, c0, c1, c2, d1, d2 E2
+	t0.Square(&x.B0)
+	t1.Square(&x.B1)
+	t2.Square(&x.B2)
+	t3.Mul(&x.B0, &x.B1)
+	t4.Mul(&x.B0, &x.B2)
+	t5.Mul(&x.B1, &x.B2)
+	c0.MulByNonResidue(&t5).Neg(&c0).Add(&c0, &t0)
+	c1.MulByNonResidue(&t2).Sub(&c1, &t3)
+	c2.Sub(&t1, &t4)
+	t6.Mul(&x.B0, &c0)
+	d1.Mul(&x.B2, &c1)
+	d2.Mul(&x.B1, &c2)
+	d1.Add(&d1, &d2).MulByNonResidue(&d1)
+	t6.Add(&t6, &d1)
+	t6.Inverse(&t6)
+	z.B0.Mul(&c0, &t6)
+	z.B1.Mul(&c1, &t6)
+	z.B2.Mul(&c2, &t6)
+
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/e6_test.go b/ecc/bls12-378/internal/fptower/e6_test.go
new file mode 100644
index 000000000..b6d418d30
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/e6_test.go
@@ -0,0 +1,317 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package fptower
+
+import (
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestE6ReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+	genE2 := GenE2()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (addition) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Add(a, b)
+			a.Add(a, b)
+			b.Add(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (sub) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Sub(a, b)
+			a.Sub(a, b)
+			b.Sub(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul) should output the same result", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Set(a)
+			c.Mul(a, b)
+			a.Mul(a, b)
+			b.Mul(&d, b)
+			return a.Equal(b) && a.Equal(&c) && b.Equal(&c)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (square) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Square(a)
+			a.Square(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (neg) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Neg(a)
+			a.Neg(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (double) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Double(a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by non residue) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.MulByNonResidue(a)
+			a.MulByNonResidue(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (Inverse) should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a)
+			a.Inverse(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Having the receiver as operand (mul by E2) should output the same result", prop.ForAll(
+		func(a *E6, b *E2) bool {
+			var c E6
+			c.MulByE2(a, b)
+			a.MulByE2(a, b)
+			return a.Equal(&c)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestE6Ops(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+	genE2 := GenE2()
+
+	properties.Property("[BLS12-378] sub & add should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c E6
+			c.Set(a)
+			c.Add(&c, b).Sub(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] mul & inverse should leave an element invariant", prop.ForAll(
+		func(a, b *E6) bool {
+			var c, d E6
+			d.Inverse(b)
+			c.Set(a)
+			c.Mul(&c, b).Mul(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.Property("[BLS12-378] inverse twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Inverse(a).Inverse(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] neg twice should leave an element invariant", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Neg(a).Neg(&b)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] square and mul should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.Mul(a, a)
+			c.Square(a)
+			return b.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Double and add twice should output the same result", prop.ForAll(
+		func(a *E6) bool {
+			var b E6
+			b.Add(a, a)
+			a.Double(a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Mul by non residue should be the same as multiplying by (0,1,0)", prop.ForAll(
+		func(a *E6) bool {
+			var b, c E6
+			b.B1.A0.SetOne()
+			c.Mul(a, &b)
+			a.MulByNonResidue(a)
+			return a.Equal(&c)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] MulByE2 MulByE2 inverse should leave an element invariant", prop.ForAll(
+		func(a *E6, b *E2) bool {
+			var c E6
+			var d E2
+			d.Inverse(b)
+			c.MulByE2(a, b).MulByE2(&c, &d)
+			return c.Equal(a)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.Property("[BLS12-378] Mul and MulBy01 should output the same result", prop.ForAll(
+		func(a *E6, c0, c1 *E2) bool {
+			var b E6
+			b.B0.Set(c0)
+			b.B1.Set(c1)
+			b.Mul(&b, a)
+			a.MulBy01(c0, c1)
+			return b.Equal(a)
+		},
+		genA,
+		genE2,
+		genE2,
+	))
+
+	properties.Property("[BLS12-378] Mul and MulBy1 should output the same result", prop.ForAll(
+		func(a *E6, c1 *E2) bool {
+			var b E6
+			b.B1.Set(c1)
+			b.Mul(&b, a)
+			a.MulBy1(c1)
+			return b.Equal(a)
+		},
+		genA,
+		genE2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkE6Add(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Add(&a, &c)
+	}
+}
+
+func BenchmarkE6Sub(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Sub(&a, &c)
+	}
+}
+
+func BenchmarkE6Mul(b *testing.B) {
+	var a, c E6
+	a.SetRandom()
+	c.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Mul(&a, &c)
+	}
+}
+
+func BenchmarkE6Square(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Square(&a)
+	}
+}
+
+func BenchmarkE6Inverse(b *testing.B) {
+	var a E6
+	a.SetRandom()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		a.Inverse(&a)
+	}
+}
diff --git a/ecc/bls12-378/internal/fptower/frobenius.go b/ecc/bls12-378/internal/fptower/frobenius.go
new file mode 100644
index 000000000..4d477043a
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/frobenius.go
@@ -0,0 +1,305 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package fptower
+
+import "github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+// Frobenius set z to Frobenius(x), return z
+func (z *E12) Frobenius(x *E12) *E12 {
+	// Algorithm 28 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	// Frobenius acts on fp2 by conjugation
+	t[0].Conjugate(&x.C0.B0)
+	t[1].Conjugate(&x.C0.B1)
+	t[2].Conjugate(&x.C0.B2)
+	t[3].Conjugate(&x.C1.B0)
+	t[4].Conjugate(&x.C1.B1)
+	t[5].Conjugate(&x.C1.B2)
+
+	t[1].MulByNonResidue1Power2(&t[1])
+	t[2].MulByNonResidue1Power4(&t[2])
+	t[3].MulByNonResidue1Power1(&t[3])
+	t[4].MulByNonResidue1Power3(&t[4])
+	t[5].MulByNonResidue1Power5(&t[5])
+
+	z.C0.B0 = t[0]
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// FrobeniusSquare set z to Frobenius^2(x), and return z
+func (z *E12) FrobeniusSquare(x *E12) *E12 {
+	// Algorithm 29 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	t[1].MulByNonResidue2Power2(&x.C0.B1)
+	t[2].MulByNonResidue2Power4(&x.C0.B2)
+	t[3].MulByNonResidue2Power1(&x.C1.B0)
+	t[4].MulByNonResidue2Power3(&x.C1.B1)
+	t[5].MulByNonResidue2Power5(&x.C1.B2)
+
+	z.C0.B0 = x.C0.B0
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// FrobeniusCube set z to Frobenius^3(x), return z
+func (z *E12) FrobeniusCube(x *E12) *E12 {
+	// Algorithm 30 from https://eprint.iacr.org/2010/354.pdf (beware typos!)
+	var t [6]E2
+
+	// Frobenius^3 acts on fp2 by conjugation
+	t[0].Conjugate(&x.C0.B0)
+	t[1].Conjugate(&x.C0.B1)
+	t[2].Conjugate(&x.C0.B2)
+	t[3].Conjugate(&x.C1.B0)
+	t[4].Conjugate(&x.C1.B1)
+	t[5].Conjugate(&x.C1.B2)
+
+	t[1].MulByNonResidue3Power2(&t[1])
+	t[3].MulByNonResidue3Power1(&t[3])
+	t[4].MulByNonResidue3Power3(&t[4])
+	t[5].MulByNonResidue3Power5(&t[5])
+
+	z.C0.B0 = t[0]
+	z.C0.B1 = t[1]
+	z.C0.B2 = t[2]
+	z.C1.B0 = t[3]
+	z.C1.B1 = t[4]
+	z.C1.B2 = t[5]
+
+	return z
+}
+
+// MulByNonResidue1Power1 set z=x*(0,1)^(1*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power1(x *E2) *E2 {
+	b := fp.Element{
+		9424304261440581301,
+		15622662318784019360,
+		5704744713545767383,
+		7376930514650170538,
+		2328236726423359970,
+		256435709676028998,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power2 set z=x*(0,1)^(2*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power2(x *E2) *E2 {
+	b := fp.Element{
+		1263886799460835702,
+		3481310115429540252,
+		1430516082310201521,
+		10760454131030452261,
+		15881431079209118478,
+		56234068425139279,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power3 set z=x*(0,1)^(3*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power3(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power4 set z=x*(0,1)^(4*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power4(x *E2) *E2 {
+	b := fp.Element{
+		18229265454137549239,
+		11882161740266529218,
+		12635080069402934820,
+		1928134709134316785,
+		2524500224088382290,
+		27735392882694645,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue1Power5 set z=x*(0,1)^(5*(p^1-1)/6) and return z
+func (z *E2) MulByNonResidue1Power5(x *E2) *E2 {
+	b := fp.Element{
+		7935976750720062874,
+		15312939023531261798,
+		15806716224795225087,
+		16245402142124945993,
+		7862827682069246910,
+		277569374620018935,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power1 set z=x*(0,1)^(1*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power1(x *E2) *E2 {
+	b := fp.Element{
+		1263886799460835702,
+		3481310115429540252,
+		1430516082310201521,
+		10760454131030452261,
+		15881431079209118478,
+		56234068425139279,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power2 set z=x*(0,1)^(2*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power2(x *E2) *E2 {
+	b := fp.Element{
+		18229265454137549239,
+		11882161740266529218,
+		12635080069402934820,
+		1928134709134316785,
+		2524500224088382290,
+		27735392882694645,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power3 set z=x*(0,1)^(3*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power3(x *E2) *E2 {
+	b := fp.Element{
+		9563890787977003074,
+		4840746681246416935,
+		3714448202430192371,
+		680864871707381747,
+		11127835353457883110,
+		254858945967818549,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power4 set z=x*(0,1)^(4*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power4(x *E2) *E2 {
+	b := fp.Element{
+		9781369407549005451,
+		11405329014689439332,
+		9526112206736809166,
+		17199474236282616577,
+		8603335129369500819,
+		227123553085123904,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue2Power5 set z=x*(0,1)^(5*(p^2-1)/6) and return z
+func (z *E2) MulByNonResidue2Power5(x *E2) *E2 {
+	b := fp.Element{
+		11262734826581843530,
+		3004477389852450365,
+		16768292293353627483,
+		7585049584469200436,
+		3513521910780685392,
+		255622228627568539,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power1 set z=x*(0,1)^(1*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power1(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power2 set z=x*(0,1)^(2*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power2(x *E2) *E2 {
+	b := fp.Element{
+		9563890787977003074,
+		4840746681246416935,
+		3714448202430192371,
+		680864871707381747,
+		11127835353457883110,
+		254858945967818549,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power3 set z=x*(0,1)^(3*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power3(x *E2) *E2 {
+	b := fp.Element{
+		4730231401859038131,
+		17284420991632229626,
+		401795639753028903,
+		13850780004141469529,
+		1884979861245528483,
+		32710158724478435,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
+
+// MulByNonResidue3Power5 set z=x*(0,1)^(5*(p^3-1)/6) and return z
+func (z *E2) MulByNonResidue3Power5(x *E2) *E2 {
+	b := fp.Element{
+		6315024805150803022,
+		16048962212196301574,
+		10554832649293981783,
+		14109148363171599309,
+		4153042273623539198,
+		250647462785784749,
+	}
+	z.A0.Mul(&x.A0, &b)
+	z.A1.Mul(&x.A1, &b)
+	return z
+}
diff --git a/ecc/bls12-378/internal/fptower/generators_test.go b/ecc/bls12-378/internal/fptower/generators_test.go
new file mode 100644
index 000000000..b735007c2
--- /dev/null
+++ b/ecc/bls12-378/internal/fptower/generators_test.go
@@ -0,0 +1,51 @@
+package fptower
+
+import (
+	"crypto/rand"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/leanovate/gopter"
+)
+
+// Fp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		rand.Read(b[:])
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// E2 generates an E2 elmt
+func GenE2() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) *E2 {
+		return &E2{A0: values[0].(fp.Element), A1: values[1].(fp.Element)}
+	})
+}
+
+// E6 generates an E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE2(),
+		GenE2(),
+		GenE2(),
+	).Map(func(values []interface{}) *E6 {
+		return &E6{B0: *values[0].(*E2), B1: *values[1].(*E2), B2: *values[2].(*E2)}
+	})
+}
+
+// E12 generates an E6 elmt
+func GenE12() gopter.Gen {
+	return gopter.CombineGens(
+		GenE6(),
+		GenE6(),
+	).Map(func(values []interface{}) *E12 {
+		return &E12{C0: *values[0].(*E6), C1: *values[1].(*E6)}
+	})
+}
diff --git a/ecc/bls12-378/marshal.go b/ecc/bls12-378/marshal.go
new file mode 100644
index 000000000..0411ca061
--- /dev/null
+++ b/ecc/bls12-378/marshal.go
@@ -0,0 +1,1160 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+	"reflect"
+	"sync/atomic"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+)
+
+// To encode G1Affine and G2Affine points, we mask the most significant bits with these bits to specify without ambiguity
+// metadata needed for point (de)compression
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+const (
+	mMask                 byte = 0b111 << 5
+	mUncompressed         byte = 0b000 << 5
+	mUncompressedInfinity byte = 0b010 << 5
+	mCompressedSmallest   byte = 0b100 << 5
+	mCompressedLargest    byte = 0b101 << 5
+	mCompressedInfinity   byte = 0b110 << 5
+)
+
+// SizeOfGT represents the size in bytes that a GT element need in binary form
+const SizeOfGT = fptower.SizeOfGT
+
+// Encoder writes bls12-378 object values to an output stream
+type Encoder struct {
+	w   io.Writer
+	n   int64 // written bytes
+	raw bool  // raw vs compressed encoding
+}
+
+// Decoder reads bls12-378 object values from an inbound stream
+type Decoder struct {
+	r             io.Reader
+	n             int64 // read bytes
+	subGroupCheck bool  // default to true
+}
+
+// NewDecoder returns a binary decoder supporting curve bls12-378 objects in both
+// compressed and uncompressed (raw) forms
+func NewDecoder(r io.Reader, options ...func(*Decoder)) *Decoder {
+	d := &Decoder{r: r, subGroupCheck: true}
+
+	for _, o := range options {
+		o(d)
+	}
+
+	return d
+}
+
+// Decode reads the binary encoding of v from the stream
+// type must be *uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, *[]G1Affine or *[]G2Affine
+func (dec *Decoder) Decode(v interface{}) (err error) {
+	rv := reflect.ValueOf(v)
+	if rv.Kind() != reflect.Ptr || rv.IsNil() || !rv.Elem().CanSet() {
+		return errors.New("bls12-378 decoder: unsupported type, need pointer")
+	}
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// in particular, careful attention must be given to usage of Bytes() method on Elements and Points
+	// that return an array (not a slice) of bytes. Using this is beneficial to minimize memallocs
+	// in very large (de)serialization upstream in gnark.
+	// (but detrimental to code lisibility here)
+	// TODO double check memory usage and factorize this
+
+	var buf [SizeOfG2AffineUncompressed]byte
+	var read int
+
+	switch t := v.(type) {
+	case *fr.Element:
+		read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fr.Bytes])
+		return
+	case *fp.Element:
+		read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		t.SetBytes(buf[:fp.Bytes])
+		return
+	case *[]fr.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fr.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fr.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fr.Bytes])
+		}
+		return
+	case *[]fp.Element:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]fp.Element, sliceLen)
+		}
+
+		for i := 0; i < len(*t); i++ {
+			read, err = io.ReadFull(dec.r, buf[:fp.Bytes])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			(*t)[i].SetBytes(buf[:fp.Bytes])
+		}
+		return
+	case *G1Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG1AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG1AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *G2Affine:
+		// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+		read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+		dec.n += int64(read)
+		if err != nil {
+			return
+		}
+		nbBytes := SizeOfG2AffineCompressed
+		// most significant byte contains metadata
+		if !isCompressed(buf[0]) {
+			nbBytes = SizeOfG2AffineUncompressed
+			// we read more.
+			read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+		}
+		_, err = t.setBytes(buf[:nbBytes], dec.subGroupCheck)
+		return
+	case *[]G1Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G1Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG1AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG1AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG1AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG1AffineCompressed:SizeOfG1AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	case *[]G2Affine:
+		var sliceLen uint32
+		sliceLen, err = dec.readUint32()
+		if err != nil {
+			return
+		}
+		if len(*t) != int(sliceLen) {
+			*t = make([]G2Affine, sliceLen)
+		}
+		compressed := make([]bool, sliceLen)
+		for i := 0; i < len(*t); i++ {
+
+			// we start by reading compressed point size, if metadata tells us it is uncompressed, we read more.
+			read, err = io.ReadFull(dec.r, buf[:SizeOfG2AffineCompressed])
+			dec.n += int64(read)
+			if err != nil {
+				return
+			}
+			nbBytes := SizeOfG2AffineCompressed
+			// most significant byte contains metadata
+			if !isCompressed(buf[0]) {
+				nbBytes = SizeOfG2AffineUncompressed
+				// we read more.
+				read, err = io.ReadFull(dec.r, buf[SizeOfG2AffineCompressed:SizeOfG2AffineUncompressed])
+				dec.n += int64(read)
+				if err != nil {
+					return
+				}
+				_, err = (*t)[i].setBytes(buf[:nbBytes], false)
+				if err != nil {
+					return
+				}
+			} else {
+				compressed[i] = !((*t)[i].unsafeSetCompressedBytes(buf[:nbBytes]))
+			}
+		}
+		var nbErrs uint64
+		parallel.Execute(len(compressed), func(start, end int) {
+			for i := start; i < end; i++ {
+				if compressed[i] {
+					if err := (*t)[i].unsafeComputeY(dec.subGroupCheck); err != nil {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				} else if dec.subGroupCheck {
+					if !(*t)[i].IsInSubGroup() {
+						atomic.AddUint64(&nbErrs, 1)
+					}
+				}
+			}
+		})
+		if nbErrs != 0 {
+			return errors.New("point decompression failed")
+		}
+
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("bls12-378 encoder: unsupported type")
+		}
+		err = binary.Read(dec.r, binary.BigEndian, t)
+		if err == nil {
+			dec.n += int64(n)
+		}
+		return
+	}
+}
+
+// BytesRead return total bytes read from reader
+func (dec *Decoder) BytesRead() int64 {
+	return dec.n
+}
+
+func (dec *Decoder) readUint32() (r uint32, err error) {
+	var read int
+	var buf [4]byte
+	read, err = io.ReadFull(dec.r, buf[:4])
+	dec.n += int64(read)
+	if err != nil {
+		return
+	}
+	r = binary.BigEndian.Uint32(buf[:4])
+	return
+}
+
+func isCompressed(msb byte) bool {
+	mData := msb & mMask
+	return !((mData == mUncompressed) || (mData == mUncompressedInfinity))
+}
+
+// NewEncoder returns a binary encoder supporting curve bls12-378 objects
+func NewEncoder(w io.Writer, options ...func(*Encoder)) *Encoder {
+	// default settings
+	enc := &Encoder{
+		w:   w,
+		n:   0,
+		raw: false,
+	}
+
+	// handle options
+	for _, option := range options {
+		option(enc)
+	}
+
+	return enc
+}
+
+// Encode writes the binary encoding of v to the stream
+// type must be uint64, *fr.Element, *fp.Element, *G1Affine, *G2Affine, []G1Affine or []G2Affine
+func (enc *Encoder) Encode(v interface{}) (err error) {
+	if enc.raw {
+		return enc.encodeRaw(v)
+	}
+	return enc.encode(v)
+}
+
+// BytesWritten return total bytes written on writer
+func (enc *Encoder) BytesWritten() int64 {
+	return enc.n
+}
+
+// RawEncoding returns an option to use in NewEncoder(...) which sets raw encoding mode to true
+// points will not be compressed using this option
+func RawEncoding() func(*Encoder) {
+	return func(enc *Encoder) {
+		enc.raw = true
+	}
+}
+
+// NoSubgroupChecks returns an option to use in NewDecoder(...) which disable subgroup checks on the points
+// the decoder will read. Use with caution, as crafted points from an untrusted source can lead to crypto-attacks.
+func NoSubgroupChecks() func(*Decoder) {
+	return func(dec *Decoder) {
+		dec.subGroupCheck = false
+	}
+}
+
+func (enc *Encoder) encode(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineCompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+func (enc *Encoder) encodeRaw(v interface{}) (err error) {
+
+	// implementation note: code is a bit verbose (abusing code generation), but minimize allocations on the heap
+	// TODO double check memory usage and factorize this
+
+	var written int
+	switch t := v.(type) {
+	case *fr.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *fp.Element:
+		buf := t.Bytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G1Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case *G2Affine:
+		buf := t.RawBytes()
+		written, err = enc.w.Write(buf[:])
+		enc.n += int64(written)
+		return
+	case []fr.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fr.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []fp.Element:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+		var buf [fp.Bytes]byte
+		for i := 0; i < len(t); i++ {
+			buf = t[i].Bytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+
+	case []G1Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG1AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	case []G2Affine:
+		// write slice length
+		err = binary.Write(enc.w, binary.BigEndian, uint32(len(t)))
+		if err != nil {
+			return
+		}
+		enc.n += 4
+
+		var buf [SizeOfG2AffineUncompressed]byte
+
+		for i := 0; i < len(t); i++ {
+			buf = t[i].RawBytes()
+			written, err = enc.w.Write(buf[:])
+			enc.n += int64(written)
+			if err != nil {
+				return
+			}
+		}
+		return nil
+	default:
+		n := binary.Size(t)
+		if n == -1 {
+			return errors.New("<no value> encoder: unsupported type")
+		}
+		err = binary.Write(enc.w, binary.BigEndian, t)
+		enc.n += int64(n)
+		return
+	}
+}
+
+// SizeOfG1AffineCompressed represents the size in bytes that a G1Affine need in binary form, compressed
+const SizeOfG1AffineCompressed = 48
+
+// SizeOfG1AffineUncompressed represents the size in bytes that a G1Affine need in binary form, uncompressed
+const SizeOfG1AffineUncompressed = SizeOfG1AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G1Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G1Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G1Affine) Bytes() (res [SizeOfG1AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G1Affine) RawBytes() (res [SizeOfG1AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	tmp = p.Y
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	// we store X  and mask the most significant word with our metadata mask
+	tmp = p.X
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G1Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G1Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG1AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG1AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		p.X.SetBytes(buf[:fp.Bytes])
+		p.Y.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG1AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG1AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G1Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fp.Element
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bCurveCoeff)
+	if Y.Sqrt(&YSquared) == nil {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G1Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	p.X.SetBytes(bufX[:fp.Bytes])
+	// store mData in p.Y[0]
+	p.Y[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
+
+// SizeOfG2AffineCompressed represents the size in bytes that a G2Affine need in binary form, compressed
+const SizeOfG2AffineCompressed = 48 * 2
+
+// SizeOfG2AffineUncompressed represents the size in bytes that a G2Affine need in binary form, uncompressed
+const SizeOfG2AffineUncompressed = SizeOfG2AffineCompressed * 2
+
+// Marshal converts p to a byte slice (without point compression)
+func (p *G2Affine) Marshal() []byte {
+	b := p.RawBytes()
+	return b[:]
+}
+
+// Unmarshal is an allias to SetBytes()
+func (p *G2Affine) Unmarshal(buf []byte) error {
+	_, err := p.SetBytes(buf)
+	return err
+}
+
+// Bytes returns binary representation of p
+// will store X coordinate in regular form and a parity bit
+// we follow the BLS12-381 style encoding as specified in ZCash and now IETF
+// The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.
+// The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.
+// The third-most significant bit is set if (and only if) this point is in compressed form and it is not the point at infinity and its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.
+func (p *G2Affine) Bytes() (res [SizeOfG2AffineCompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+		res[0] = mCompressedInfinity
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	msbMask := mCompressedSmallest
+	// compressed, we need to know if Y is lexicographically bigger than -Y
+	// if p.Y ">" -p.Y
+	if p.Y.LexicographicallyLargest() {
+		msbMask = mCompressedLargest
+	}
+
+	// we store X  and mask the most significant word with our metadata mask
+	// p.X.A1 | p.X.A0
+	tmp = p.X.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	tmp = p.X.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	res[0] |= msbMask
+
+	return
+}
+
+// RawBytes returns binary representation of p (stores X and Y coordinate)
+// see Bytes() for a compressed representation
+func (p *G2Affine) RawBytes() (res [SizeOfG2AffineUncompressed]byte) {
+
+	// check if p is infinity point
+	if p.X.IsZero() && p.Y.IsZero() {
+
+		res[0] = mUncompressedInfinity
+
+		return
+	}
+
+	// tmp is used to convert from montgomery representation to regular
+	var tmp fp.Element
+
+	// not compressed
+	// we store the Y coordinate
+	// p.Y.A1 | p.Y.A0
+	tmp = p.Y.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[184:192], tmp[0])
+	binary.BigEndian.PutUint64(res[176:184], tmp[1])
+	binary.BigEndian.PutUint64(res[168:176], tmp[2])
+	binary.BigEndian.PutUint64(res[160:168], tmp[3])
+	binary.BigEndian.PutUint64(res[152:160], tmp[4])
+	binary.BigEndian.PutUint64(res[144:152], tmp[5])
+
+	tmp = p.Y.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[136:144], tmp[0])
+	binary.BigEndian.PutUint64(res[128:136], tmp[1])
+	binary.BigEndian.PutUint64(res[120:128], tmp[2])
+	binary.BigEndian.PutUint64(res[112:120], tmp[3])
+	binary.BigEndian.PutUint64(res[104:112], tmp[4])
+	binary.BigEndian.PutUint64(res[96:104], tmp[5])
+
+	// we store X  and mask the most significant word with our metadata mask
+	// p.X.A1 | p.X.A0
+	tmp = p.X.A1
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[40:48], tmp[0])
+	binary.BigEndian.PutUint64(res[32:40], tmp[1])
+	binary.BigEndian.PutUint64(res[24:32], tmp[2])
+	binary.BigEndian.PutUint64(res[16:24], tmp[3])
+	binary.BigEndian.PutUint64(res[8:16], tmp[4])
+	binary.BigEndian.PutUint64(res[0:8], tmp[5])
+
+	tmp = p.X.A0
+	tmp.FromMont()
+	binary.BigEndian.PutUint64(res[88:96], tmp[0])
+	binary.BigEndian.PutUint64(res[80:88], tmp[1])
+	binary.BigEndian.PutUint64(res[72:80], tmp[2])
+	binary.BigEndian.PutUint64(res[64:72], tmp[3])
+	binary.BigEndian.PutUint64(res[56:64], tmp[4])
+	binary.BigEndian.PutUint64(res[48:56], tmp[5])
+
+	res[0] |= mUncompressed
+
+	return
+}
+
+// SetBytes sets p from binary representation in buf and returns number of consumed bytes
+// bytes in buf must match either RawBytes() or Bytes() output
+// if buf is too short io.ErrShortBuffer is returned
+// if buf contains compressed representation (output from Bytes()) and we're unable to compute
+// the Y coordinate (i.e the square root doesn't exist) this function retunrs an error
+// this check if the resulting point is on the curve and in the correct subgroup
+func (p *G2Affine) SetBytes(buf []byte) (int, error) {
+	return p.setBytes(buf, true)
+}
+
+func (p *G2Affine) setBytes(buf []byte, subGroupCheck bool) (int, error) {
+	if len(buf) < SizeOfG2AffineCompressed {
+		return 0, io.ErrShortBuffer
+	}
+
+	// most significant byte
+	mData := buf[0] & mMask
+
+	// check buffer size
+	if (mData == mUncompressed) || (mData == mUncompressedInfinity) {
+		if len(buf) < SizeOfG2AffineUncompressed {
+			return 0, io.ErrShortBuffer
+		}
+	}
+
+	// if infinity is encoded in the metadata, we don't need to read the buffer
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineCompressed, nil
+	}
+	if mData == mUncompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// uncompressed point
+	if mData == mUncompressed {
+		// read X and Y coordinates
+		// p.X.A1 | p.X.A0
+		p.X.A1.SetBytes(buf[:fp.Bytes])
+		p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+		// p.Y.A1 | p.Y.A0
+		p.Y.A1.SetBytes(buf[fp.Bytes*2 : fp.Bytes*3])
+		p.Y.A0.SetBytes(buf[fp.Bytes*3 : fp.Bytes*4])
+
+		// subgroup check
+		if subGroupCheck && !p.IsInSubGroup() {
+			return 0, errors.New("invalid point: subgroup check failed")
+		}
+
+		return SizeOfG2AffineUncompressed, nil
+	}
+
+	// we have a compressed coordinate
+	// we need to
+	// 	1. copy the buffer (to keep this method thread safe)
+	// 	2. we need to solve the curve equation to compute Y
+
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	// p.X.A1 | p.X.A0
+	p.X.A1.SetBytes(bufX[:fp.Bytes])
+	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+	var YSquared, Y fptower.E2
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if YSquared.Legendre() == -1 {
+		return 0, errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+	Y.Sqrt(&YSquared)
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return 0, errors.New("invalid point: subgroup check failed")
+	}
+
+	return SizeOfG2AffineCompressed, nil
+}
+
+// unsafeComputeY called by Decoder when processing slices of compressed point in parallel (step 2)
+// it computes the Y coordinate from the already set X coordinate and is compute intensive
+func (p *G2Affine) unsafeComputeY(subGroupCheck bool) error {
+	// stored in unsafeSetCompressedBytes
+
+	mData := byte(p.Y.A0[0])
+
+	// we have a compressed coordinate, we need to solve the curve equation to compute Y
+	var YSquared, Y fptower.E2
+
+	YSquared.Square(&p.X).Mul(&YSquared, &p.X)
+	YSquared.Add(&YSquared, &bTwistCurveCoeff)
+	if YSquared.Legendre() == -1 {
+		return errors.New("invalid compressed coordinate: square root doesn't exist")
+	}
+	Y.Sqrt(&YSquared)
+
+	if Y.LexicographicallyLargest() {
+		// Y ">" -Y
+		if mData == mCompressedSmallest {
+			Y.Neg(&Y)
+		}
+	} else {
+		// Y "<=" -Y
+		if mData == mCompressedLargest {
+			Y.Neg(&Y)
+		}
+	}
+
+	p.Y = Y
+
+	// subgroup check
+	if subGroupCheck && !p.IsInSubGroup() {
+		return errors.New("invalid point: subgroup check failed")
+	}
+
+	return nil
+}
+
+// unsafeSetCompressedBytes is called by Decoder when processing slices of compressed point in parallel (step 1)
+// assumes buf[:8] mask is set to compressed
+// returns true if point is infinity and need no further processing
+// it sets X coordinate and uses Y for scratch space to store decompression metadata
+func (p *G2Affine) unsafeSetCompressedBytes(buf []byte) (isInfinity bool) {
+
+	// read the most significant byte
+	mData := buf[0] & mMask
+
+	if mData == mCompressedInfinity {
+		p.X.SetZero()
+		p.Y.SetZero()
+		isInfinity = true
+		return
+	}
+
+	// we need to copy the input buffer (to keep this method thread safe)
+	var bufX [fp.Bytes]byte
+	copy(bufX[:fp.Bytes], buf[:fp.Bytes])
+	bufX[0] &= ^mMask
+
+	// read X coordinate
+	// p.X.A1 | p.X.A0
+	p.X.A1.SetBytes(bufX[:fp.Bytes])
+	p.X.A0.SetBytes(buf[fp.Bytes : fp.Bytes*2])
+
+	// store mData in p.Y.A0[0]
+	p.Y.A0[0] = uint64(mData)
+
+	// recomputing Y will be done asynchronously
+	return
+}
diff --git a/ecc/bls12-378/marshal_test.go b/ecc/bls12-378/marshal_test.go
new file mode 100644
index 000000000..3fc6a5f12
--- /dev/null
+++ b/ecc/bls12-378/marshal_test.go
@@ -0,0 +1,467 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"bytes"
+	"io"
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+func TestEncoder(t *testing.T) {
+
+	// TODO need proper fuzz testing here
+
+	var inA uint64
+	var inB fr.Element
+	var inC fp.Element
+	var inD G1Affine
+	var inE G1Affine
+	var inF G2Affine
+	var inG []G1Affine
+	var inH []G2Affine
+	var inI []fp.Element
+	var inJ []fr.Element
+
+	// set values of inputs
+	inA = rand.Uint64()
+	inB.SetRandom()
+	inC.SetRandom()
+	inD.ScalarMultiplication(&g1GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	// inE --> infinity
+	inF.ScalarMultiplication(&g2GenAff, new(big.Int).SetUint64(rand.Uint64()))
+	inG = make([]G1Affine, 2)
+	inH = make([]G2Affine, 0)
+	inG[1] = inD
+	inI = make([]fp.Element, 3)
+	inI[2] = inD.X
+	inJ = make([]fr.Element, 0)
+
+	// encode them, compressed and raw
+	var buf, bufRaw bytes.Buffer
+	enc := NewEncoder(&buf)
+	encRaw := NewEncoder(&bufRaw, RawEncoding())
+	toEncode := []interface{}{inA, &inB, &inC, &inD, &inE, &inF, inG, inH, inI, inJ}
+	for _, v := range toEncode {
+		if err := enc.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+		if err := encRaw.Encode(v); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	testDecode := func(t *testing.T, r io.Reader, n int64) {
+		dec := NewDecoder(r)
+		var outA uint64
+		var outB fr.Element
+		var outC fp.Element
+		var outD G1Affine
+		var outE G1Affine
+		outE.X.SetOne()
+		outE.Y.SetUint64(42)
+		var outF G2Affine
+		var outG []G1Affine
+		var outH []G2Affine
+		var outI []fp.Element
+		var outJ []fr.Element
+
+		toDecode := []interface{}{&outA, &outB, &outC, &outD, &outE, &outF, &outG, &outH, &outI, &outJ}
+		for _, v := range toDecode {
+			if err := dec.Decode(v); err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		// compare values
+		if inA != outA {
+			t.Fatal("didn't encode/decode uint64 value properly")
+		}
+
+		if !inB.Equal(&outB) || !inC.Equal(&outC) {
+			t.Fatal("decode(encode(Element) failed")
+		}
+		if !inD.Equal(&outD) || !inE.Equal(&outE) {
+			t.Fatal("decode(encode(G1Affine) failed")
+		}
+		if !inF.Equal(&outF) {
+			t.Fatal("decode(encode(G2Affine) failed")
+		}
+		if (len(inG) != len(outG)) || (len(inH) != len(outH)) {
+			t.Fatal("decode(encode(slice(points))) failed")
+		}
+		for i := 0; i < len(inG); i++ {
+			if !inG[i].Equal(&outG[i]) {
+				t.Fatal("decode(encode(slice(points))) failed")
+			}
+		}
+		if (len(inI) != len(outI)) || (len(inJ) != len(outJ)) {
+			t.Fatal("decode(encode(slice(elements))) failed")
+		}
+		for i := 0; i < len(inI); i++ {
+			if !inI[i].Equal(&outI[i]) {
+				t.Fatal("decode(encode(slice(elements))) failed")
+			}
+		}
+		if n != dec.BytesRead() {
+			t.Fatal("bytes read don't match bytes written")
+		}
+	}
+
+	// decode them
+	testDecode(t, &buf, enc.BytesWritten())
+	testDecode(t, &bufRaw, encRaw.BytesWritten())
+
+}
+
+func TestIsCompressed(t *testing.T) {
+	var g1Inf, g1 G1Affine
+	var g2Inf, g2 G2Affine
+
+	g1 = g1GenAff
+	g2 = g2GenAff
+
+	{
+		b := g1Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g1.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g1.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g1.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g1.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2Inf.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2Inf.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2Inf.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2Inf.RawBytes() should be uncompressed")
+		}
+	}
+
+	{
+		b := g2.Bytes()
+		if !isCompressed(b[0]) {
+			t.Fatal("g2.Bytes() should be compressed")
+		}
+	}
+
+	{
+		b := g2.RawBytes()
+		if isCompressed(b[0]) {
+			t.Fatal("g2.RawBytes() should be uncompressed")
+		}
+	}
+
+}
+
+func TestG1AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G1Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG1AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G1] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G1] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G1Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g1GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG1AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestG2AffineSerialization(t *testing.T) {
+
+	// test round trip serialization of infinity
+	{
+		// compressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.Bytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineCompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+
+		// uncompressed
+		{
+			var p1, p2 G2Affine
+			p2.X.SetRandom()
+			p2.Y.SetRandom()
+			buf := p1.RawBytes()
+			n, err := p2.SetBytes(buf[:])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if n != SizeOfG2AffineUncompressed {
+				t.Fatal("invalid number of bytes consumed in buffer")
+			}
+			if !(p2.X.IsZero() && p2.Y.IsZero()) {
+				t.Fatal("deserialization of uncompressed infinity point is not infinity")
+			}
+		}
+	}
+
+	parameters := gopter.DefaultTestParameters()
+	if testing.Short() {
+		parameters.MinSuccessfulTests = 100
+	} else {
+		parameters.MinSuccessfulTests = 1000
+	}
+
+	properties := gopter.NewProperties(parameters)
+
+	properties.Property("[G2] Affine SetBytes(RawBytes) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.RawBytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineUncompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.Property("[G2] Affine SetBytes(Bytes()) should stay the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var start, end G2Affine
+			var ab big.Int
+			a.ToBigIntRegular(&ab)
+			start.ScalarMultiplication(&g2GenAff, &ab)
+
+			buf := start.Bytes()
+			n, err := end.SetBytes(buf[:])
+			if err != nil {
+				return false
+			}
+			if n != SizeOfG2AffineCompressed {
+				return false
+			}
+			return start.X.Equal(&end.X) && start.Y.Equal(&end.Y)
+		},
+		GenFp(),
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// define Gopters generators
+
+// GenFr generates an Fr element
+func GenFr() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fr.Element
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenFp generates an Fp element
+func GenFp() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var elmt fp.Element
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		elmt.SetBytes(b[:])
+		genResult := gopter.NewGenResult(elmt, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// GenE2 generates an fptower.E2 elmt
+func GenE2() gopter.Gen {
+	return gopter.CombineGens(
+		GenFp(),
+		GenFp(),
+	).Map(func(values []interface{}) fptower.E2 {
+		return fptower.E2{A0: values[0].(fp.Element), A1: values[1].(fp.Element)}
+	})
+}
+
+// GenE6 generates an fptower.E6 elmt
+func GenE6() gopter.Gen {
+	return gopter.CombineGens(
+		GenE2(),
+		GenE2(),
+		GenE2(),
+	).Map(func(values []interface{}) fptower.E6 {
+		return fptower.E6{B0: values[0].(fptower.E2), B1: values[1].(fptower.E2), B2: values[2].(fptower.E2)}
+	})
+}
+
+// GenE12 generates an fptower.E6 elmt
+func GenE12() gopter.Gen {
+	return gopter.CombineGens(
+		GenE6(),
+		GenE6(),
+	).Map(func(values []interface{}) fptower.E12 {
+		return fptower.E12{C0: values[0].(fptower.E6), C1: values[1].(fptower.E6)}
+	})
+}
+
+// GenBigInt generates a big.Int
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fp.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
diff --git a/ecc/bls12-378/multiexp.go b/ecc/bls12-378/multiexp.go
new file mode 100644
index 000000000..e9203d0d0
--- /dev/null
+++ b/ecc/bls12-378/multiexp.go
@@ -0,0 +1,2303 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"errors"
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/internal/parallel"
+	"math"
+	"runtime"
+)
+
+// selector stores the index, mask and shifts needed to select bits from a scalar
+// it is used during the multiExp algorithm or the batch scalar multiplication
+type selector struct {
+	index uint64 // index in the multi-word scalar to select bits from
+	mask  uint64 // mask (c-bit wide)
+	shift uint64 // shift needed to get our bits on low positions
+
+	multiWordSelect bool   // set to true if we need to select bits from 2 words (case where c doesn't divide 64)
+	maskHigh        uint64 // same than mask, for index+1
+	shiftHigh       uint64 // same than shift, for index+1
+}
+
+// partitionScalars  compute, for each scalars over c-bit wide windows, nbChunk digits
+// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+// 2^{c} to the current digit, making it negative.
+// negative digits can be processed in a later step as adding -G into the bucket instead of G
+// (computing -G is cheap, and this saves us half of the buckets in the MultiExp or BatchScalarMul)
+// scalarsMont indicates wheter the provided scalars are in montgomery form
+// returns smallValues, which represent the number of scalars which meets the following condition
+// 0 < scalar < 2^c (in other words, scalars where only the c-least significant bits are non zero)
+func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks int) ([]fr.Element, int) {
+	toReturn := make([]fr.Element, len(scalars))
+
+	// number of c-bit radixes in a scalar
+	nbChunks := fr.Limbs * 64 / c
+	if (fr.Limbs*64)%c != 0 {
+		nbChunks++
+	}
+
+	mask := uint64((1 << c) - 1)      // low c bits are 1
+	msbWindow := uint64(1 << (c - 1)) // msb of the c-bit window
+	max := int(1 << (c - 1))          // max value we want for our digits
+	cDivides64 := (64 % c) == 0       // if c doesn't divide 64, we may need to select over multiple words
+
+	// compute offset and word selector / shift to select the right bits of our windows
+	selectors := make([]selector, nbChunks)
+	for chunk := uint64(0); chunk < nbChunks; chunk++ {
+		jc := uint64(chunk * c)
+		d := selector{}
+		d.index = jc / 64
+		d.shift = jc - (d.index * 64)
+		d.mask = mask << d.shift
+		d.multiWordSelect = !cDivides64 && d.shift > (64-c) && d.index < (fr.Limbs-1)
+		if d.multiWordSelect {
+			nbBitsHigh := d.shift - uint64(64-c)
+			d.maskHigh = (1 << nbBitsHigh) - 1
+			d.shiftHigh = (c - nbBitsHigh)
+		}
+		selectors[chunk] = d
+	}
+
+	// for each chunk, we could track the number of non-zeros points we will need to process
+	// this way, if a chunk has more work to do than others, we can spawn off more go routines
+	// (at the cost of more buckets allocated)
+	// a simplified approach is to track the small values where only the first word is set
+	// if this number represent a significant number of points, then we will split first chunk
+	// processing in the msm in 2, to ensure all go routines finish at ~same time
+	// /!\ nbTasks is enough as parallel.Execute is not going to spawn more than nbTasks go routine
+	// if it does, though, this will deadlocK.
+	chSmallValues := make(chan int, nbTasks)
+
+	parallel.Execute(len(scalars), func(start, end int) {
+		smallValues := 0
+		for i := start; i < end; i++ {
+			var carry int
+
+			scalar := scalars[i]
+			if scalarsMont {
+				scalar.FromMont()
+			}
+			if scalar.IsUint64() {
+				// everything is 0, no need to process this scalar
+				if scalar[0] == 0 {
+					continue
+				}
+				// low c-bits are 1 in mask
+				if scalar[0]&mask == scalar[0] {
+					smallValues++
+				}
+			}
+
+			// for each chunk in the scalar, compute the current digit, and an eventual carry
+			for chunk := uint64(0); chunk < nbChunks; chunk++ {
+				s := selectors[chunk]
+
+				// init with carry if any
+				digit := carry
+				carry = 0
+
+				// digit = value of the c-bit window
+				digit += int((scalar[s.index] & s.mask) >> s.shift)
+
+				if s.multiWordSelect {
+					// we are selecting bits over 2 words
+					digit += int(scalar[s.index+1]&s.maskHigh) << s.shiftHigh
+				}
+
+				// if digit is zero, no impact on result
+				if digit == 0 {
+					continue
+				}
+
+				// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+				// 2^{c} to the current digit, making it negative.
+				if digit >= max {
+					digit -= (1 << c)
+					carry = 1
+				}
+
+				var bits uint64
+				if digit >= 0 {
+					bits = uint64(digit)
+				} else {
+					bits = uint64(-digit-1) | msbWindow
+				}
+
+				toReturn[i][s.index] |= (bits << s.shift)
+				if s.multiWordSelect {
+					toReturn[i][s.index+1] |= (bits >> s.shiftHigh)
+				}
+
+			}
+		}
+
+		chSmallValues <- smallValues
+
+	}, nbTasks)
+
+	// aggregate small values
+	close(chSmallValues)
+	smallValues := 0
+	for o := range chSmallValues {
+		smallValues += o
+	}
+	return toReturn, smallValues
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Affine) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Affine, error) {
+	var _p G1Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G1Jac) MultiExp(points []G1Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G1Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG1Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G1Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG1Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG1Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG1Jac(p *G1Jac, c int, points []G1Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 6:
+		p.msmC6(points, scalars, splitFirstChunk)
+
+	case 7:
+		p.msmC7(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 9:
+		p.msmC9(points, scalars, splitFirstChunk)
+
+	case 10:
+		p.msmC10(points, scalars, splitFirstChunk)
+
+	case 11:
+		p.msmC11(points, scalars, splitFirstChunk)
+
+	case 12:
+		p.msmC12(points, scalars, splitFirstChunk)
+
+	case 13:
+		p.msmC13(points, scalars, splitFirstChunk)
+
+	case 14:
+		p.msmC14(points, scalars, splitFirstChunk)
+
+	case 15:
+		p.msmC15(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	case 20:
+		p.msmC20(points, scalars, splitFirstChunk)
+
+	case 21:
+		p.msmC21(points, scalars, splitFirstChunk)
+
+	case 22:
+		p.msmC22(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG1Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG1Affine(p *G1Jac, c int, chChunks []chan g1JacExtended) *G1Jac {
+	var _p g1JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG1Affine(chunk uint64,
+	chRes chan<- g1JacExtended,
+	buckets []g1JacExtended,
+	c uint64,
+	points []G1Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g1JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G1Jac) msmC4(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC5(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC6(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC7(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC8(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC9(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC10(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC11(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC12(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC13(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC14(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC15(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC16(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC20(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC21(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+func (p *G1Jac) msmC22(points []G1Affine, scalars []fr.Element, splitFirstChunk bool) *G1Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g1JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g1JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G1Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g1JacExtended
+		msmProcessChunkG1Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G1Affine, scalars []fr.Element, chChunk chan g1JacExtended) {
+		var buckets [1 << (c - 1)]g1JacExtended
+		msmProcessChunkG1Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g1JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG1Affine(p, c, chChunks[:])
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Affine) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Affine, error) {
+	var _p G2Jac
+	if _, err := _p.MultiExp(points, scalars, config); err != nil {
+		return nil, err
+	}
+	p.FromJacobian(&_p)
+	return p, nil
+}
+
+// MultiExp implements section 4 of https://eprint.iacr.org/2012/549.pdf
+func (p *G2Jac) MultiExp(points []G2Affine, scalars []fr.Element, config ecc.MultiExpConfig) (*G2Jac, error) {
+	// note:
+	// each of the msmCX method is the same, except for the c constant it declares
+	// duplicating (through template generation) these methods allows to declare the buckets on the stack
+	// the choice of c needs to be improved:
+	// there is a theoritical value that gives optimal asymptotics
+	// but in practice, other factors come into play, including:
+	// * if c doesn't divide 64, the word size, then we're bound to select bits over 2 words of our scalars, instead of 1
+	// * number of CPUs
+	// * cache friendliness (which depends on the host, G1 or G2... )
+	//	--> for example, on BN254, a G1 point fits into one cache line of 64bytes, but a G2 point don't.
+
+	// for each msmCX
+	// step 1
+	// we compute, for each scalars over c-bit wide windows, nbChunk digits
+	// if the digit is larger than 2^{c-1}, then, we borrow 2^c from the next window and substract
+	// 2^{c} to the current digit, making it negative.
+	// negative digits will be processed in the next step as adding -G into the bucket instead of G
+	// (computing -G is cheap, and this saves us half of the buckets)
+	// step 2
+	// buckets are declared on the stack
+	// notice that we have 2^{c-1} buckets instead of 2^{c} (see step1)
+	// we use jacobian extended formulas here as they are faster than mixed addition
+	// msmProcessChunk places points into buckets base on their selector and return the weighted bucket sum in given channel
+	// step 3
+	// reduce the buckets weigthed sums into our result (msmReduceChunk)
+
+	// ensure len(points) == len(scalars)
+	nbPoints := len(points)
+	if nbPoints != len(scalars) {
+		return nil, errors.New("len(points) != len(scalars)")
+	}
+
+	// if nbTasks is not set, use all available CPUs
+	if config.NbTasks <= 0 {
+		config.NbTasks = runtime.NumCPU()
+	}
+
+	// here, we compute the best C for nbPoints
+	// we split recursively until nbChunks(c) >= nbTasks,
+	bestC := func(nbPoints int) uint64 {
+		// implemented msmC methods (the c we use must be in this slice)
+		implementedCs := []uint64{4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 22}
+		var C uint64
+		// approximate cost (in group operations)
+		// cost = bits/c * (nbPoints + 2^{c})
+		// this needs to be verified empirically.
+		// for example, on a MBP 2016, for G2 MultiExp > 8M points, hand picking c gives better results
+		min := math.MaxFloat64
+		for _, c := range implementedCs {
+			cc := fr.Limbs * 64 * (nbPoints + (1 << (c)))
+			cost := float64(cc) / float64(c)
+			if cost < min {
+				min = cost
+				C = c
+			}
+		}
+		// empirical, needs to be tuned.
+		// if C > 16 && nbPoints < 1 << 23 {
+		// 	C = 16
+		// }
+		return C
+	}
+
+	var C uint64
+	nbSplits := 1
+	nbChunks := 0
+	for nbChunks < config.NbTasks {
+		C = bestC(nbPoints)
+		nbChunks = int(fr.Limbs * 64 / C) // number of c-bit radixes in a scalar
+		if (fr.Limbs*64)%C != 0 {
+			nbChunks++
+		}
+		nbChunks *= nbSplits
+		if nbChunks < config.NbTasks {
+			nbSplits <<= 1
+			nbPoints >>= 1
+		}
+	}
+
+	// partition the scalars
+	// note: we do that before the actual chunk processing, as for each c-bit window (starting from LSW)
+	// if it's larger than 2^{c-1}, we have a carry we need to propagate up to the higher window
+	var smallValues int
+	scalars, smallValues = partitionScalars(scalars, C, config.ScalarsMont, config.NbTasks)
+
+	// if we have more than 10% of small values, we split the processing of the first chunk in 2
+	// we may want to do that in msmInnerG2Jac , but that would incur a cost of looping through all scalars one more time
+	splitFirstChunk := (float64(smallValues) / float64(len(scalars))) >= 0.1
+
+	// we have nbSplits intermediate results that we must sum together.
+	_p := make([]G2Jac, nbSplits-1)
+	chDone := make(chan int, nbSplits-1)
+	for i := 0; i < nbSplits-1; i++ {
+		start := i * nbPoints
+		end := start + nbPoints
+		go func(start, end, i int) {
+			msmInnerG2Jac(&_p[i], int(C), points[start:end], scalars[start:end], splitFirstChunk)
+			chDone <- i
+		}(start, end, i)
+	}
+
+	msmInnerG2Jac(p, int(C), points[(nbSplits-1)*nbPoints:], scalars[(nbSplits-1)*nbPoints:], splitFirstChunk)
+	for i := 0; i < nbSplits-1; i++ {
+		done := <-chDone
+		p.AddAssign(&_p[done])
+	}
+	close(chDone)
+	return p, nil
+}
+
+func msmInnerG2Jac(p *G2Jac, c int, points []G2Affine, scalars []fr.Element, splitFirstChunk bool) {
+
+	switch c {
+
+	case 4:
+		p.msmC4(points, scalars, splitFirstChunk)
+
+	case 5:
+		p.msmC5(points, scalars, splitFirstChunk)
+
+	case 6:
+		p.msmC6(points, scalars, splitFirstChunk)
+
+	case 7:
+		p.msmC7(points, scalars, splitFirstChunk)
+
+	case 8:
+		p.msmC8(points, scalars, splitFirstChunk)
+
+	case 9:
+		p.msmC9(points, scalars, splitFirstChunk)
+
+	case 10:
+		p.msmC10(points, scalars, splitFirstChunk)
+
+	case 11:
+		p.msmC11(points, scalars, splitFirstChunk)
+
+	case 12:
+		p.msmC12(points, scalars, splitFirstChunk)
+
+	case 13:
+		p.msmC13(points, scalars, splitFirstChunk)
+
+	case 14:
+		p.msmC14(points, scalars, splitFirstChunk)
+
+	case 15:
+		p.msmC15(points, scalars, splitFirstChunk)
+
+	case 16:
+		p.msmC16(points, scalars, splitFirstChunk)
+
+	case 20:
+		p.msmC20(points, scalars, splitFirstChunk)
+
+	case 21:
+		p.msmC21(points, scalars, splitFirstChunk)
+
+	case 22:
+		p.msmC22(points, scalars, splitFirstChunk)
+
+	default:
+		panic("not implemented")
+	}
+}
+
+// msmReduceChunkG2Affine reduces the weighted sum of the buckets into the result of the multiExp
+func msmReduceChunkG2Affine(p *G2Jac, c int, chChunks []chan g2JacExtended) *G2Jac {
+	var _p g2JacExtended
+	totalj := <-chChunks[len(chChunks)-1]
+	_p.Set(&totalj)
+	for j := len(chChunks) - 2; j >= 0; j-- {
+		for l := 0; l < c; l++ {
+			_p.double(&_p)
+		}
+		totalj := <-chChunks[j]
+		_p.add(&totalj)
+	}
+
+	return p.unsafeFromJacExtended(&_p)
+}
+
+func msmProcessChunkG2Affine(chunk uint64,
+	chRes chan<- g2JacExtended,
+	buckets []g2JacExtended,
+	c uint64,
+	points []G2Affine,
+	scalars []fr.Element) {
+
+	mask := uint64((1 << c) - 1) // low c bits are 1
+	msbWindow := uint64(1 << (c - 1))
+
+	for i := 0; i < len(buckets); i++ {
+		buckets[i].setInfinity()
+	}
+
+	jc := uint64(chunk * c)
+	s := selector{}
+	s.index = jc / 64
+	s.shift = jc - (s.index * 64)
+	s.mask = mask << s.shift
+	s.multiWordSelect = (64%c) != 0 && s.shift > (64-c) && s.index < (fr.Limbs-1)
+	if s.multiWordSelect {
+		nbBitsHigh := s.shift - uint64(64-c)
+		s.maskHigh = (1 << nbBitsHigh) - 1
+		s.shiftHigh = (c - nbBitsHigh)
+	}
+
+	// for each scalars, get the digit corresponding to the chunk we're processing.
+	for i := 0; i < len(scalars); i++ {
+		bits := (scalars[i][s.index] & s.mask) >> s.shift
+		if s.multiWordSelect {
+			bits += (scalars[i][s.index+1] & s.maskHigh) << s.shiftHigh
+		}
+
+		if bits == 0 {
+			continue
+		}
+
+		// if msbWindow bit is set, we need to substract
+		if bits&msbWindow == 0 {
+			// add
+			buckets[bits-1].addMixed(&points[i])
+		} else {
+			// sub
+			buckets[bits & ^msbWindow].subMixed(&points[i])
+		}
+	}
+
+	// reduce buckets into total
+	// total =  bucket[0] + 2*bucket[1] + 3*bucket[2] ... + n*bucket[n-1]
+
+	var runningSum, total g2JacExtended
+	runningSum.setInfinity()
+	total.setInfinity()
+	for k := len(buckets) - 1; k >= 0; k-- {
+		if !buckets[k].ZZ.IsZero() {
+			runningSum.add(&buckets[k])
+		}
+		total.add(&runningSum)
+	}
+
+	chRes <- total
+
+}
+
+func (p *G2Jac) msmC4(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 4                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC5(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 5                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC6(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 6                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC7(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 7                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC8(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 8                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC9(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 9                   // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC10(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 10                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC11(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 11                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC12(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 12                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC13(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 13                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC14(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 14                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC15(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 15                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC16(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 16                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC20(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 20                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC21(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 21                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
+
+func (p *G2Jac) msmC22(points []G2Affine, scalars []fr.Element, splitFirstChunk bool) *G2Jac {
+	const (
+		c        = 22                  // scalars partitioned into c-bit radixes
+		nbChunks = (fr.Limbs * 64 / c) // number of c-bit radixes in a scalar
+	)
+
+	// for each chunk, spawn one go routine that'll loop through all the scalars in the
+	// corresponding bit-window
+	// note that buckets is an array allocated on the stack (for most sizes of c) and this is
+	// critical for performance
+
+	// each go routine sends its result in chChunks[i] channel
+	var chChunks [nbChunks + 1]chan g2JacExtended
+	for i := 0; i < len(chChunks); i++ {
+		chChunks[i] = make(chan g2JacExtended, 1)
+	}
+
+	// c doesn't divide 256, last window is smaller we can allocate less buckets
+	const lastC = (fr.Limbs * 64) - (c * (fr.Limbs * 64 / c))
+	go func(j uint64, points []G2Affine, scalars []fr.Element) {
+		var buckets [1 << (lastC - 1)]g2JacExtended
+		msmProcessChunkG2Affine(j, chChunks[j], buckets[:], c, points, scalars)
+	}(uint64(nbChunks), points, scalars)
+
+	processChunk := func(j int, points []G2Affine, scalars []fr.Element, chChunk chan g2JacExtended) {
+		var buckets [1 << (c - 1)]g2JacExtended
+		msmProcessChunkG2Affine(uint64(j), chChunk, buckets[:], c, points, scalars)
+	}
+
+	for j := int(nbChunks - 1); j > 0; j-- {
+		go processChunk(j, points, scalars, chChunks[j])
+	}
+
+	if !splitFirstChunk {
+		go processChunk(0, points, scalars, chChunks[0])
+	} else {
+		chSplit := make(chan g2JacExtended, 2)
+		split := len(points) / 2
+		go processChunk(0, points[:split], scalars[:split], chSplit)
+		go processChunk(0, points[split:], scalars[split:], chSplit)
+		go func() {
+			s1 := <-chSplit
+			s2 := <-chSplit
+			close(chSplit)
+			s1.add(&s2)
+			chChunks[0] <- s1
+		}()
+	}
+
+	return msmReduceChunkG2Affine(p, c, chChunks[:])
+}
diff --git a/ecc/bls12-378/multiexp_test.go b/ecc/bls12-378/multiexp_test.go
new file mode 100644
index 000000000..2ea2502d3
--- /dev/null
+++ b/ecc/bls12-378/multiexp_test.go
@@ -0,0 +1,1349 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"math/bits"
+	"runtime"
+	"sync"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+func TestMultiExpG1(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G1Affine
+	var g G1Jac
+	g.Set(&g1Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g1Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G1] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G1Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G1Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G1] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G1Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G1Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G1] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=6) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 6, false, runtime.NumCPU())
+				result.msmC6(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=7) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 7, false, runtime.NumCPU())
+				result.msmC7(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=9) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 9, false, runtime.NumCPU())
+				result.msmC9(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=10) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 10, false, runtime.NumCPU())
+				result.msmC10(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=11) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 11, false, runtime.NumCPU())
+				result.msmC11(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=12) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 12, false, runtime.NumCPU())
+				result.msmC12(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=13) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 13, false, runtime.NumCPU())
+				result.msmC13(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=14) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 14, false, runtime.NumCPU())
+				result.msmC14(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=15) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 15, false, runtime.NumCPU())
+				result.msmC15(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=20) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 20, false, runtime.NumCPU())
+				result.msmC20(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=21) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 21, false, runtime.NumCPU())
+				result.msmC21(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G1] Multi exponentation (c=22) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G1Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 22, false, runtime.NumCPU())
+				result.msmC22(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g1Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G1] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G1Jac
+			g.Set(&g1Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G1Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g1Gen)
+			}
+
+			var op1MultiExp G1Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G1Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g1GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG1(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var testPoint G1Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG1Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G1Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g1GenAff
+	}
+
+	var t1, t2, t3 G1Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
+
+func TestMultiExpG2(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 2
+
+	properties := gopter.NewProperties(parameters)
+
+	genScalar := GenFr()
+
+	// size of the multiExps
+	const nbSamples = 143
+
+	// multi exp points
+	var samplePoints [nbSamples]G2Affine
+	var g G2Jac
+	g.Set(&g2Gen)
+	for i := 1; i <= nbSamples; i++ {
+		samplePoints[i-1].FromJacobian(&g)
+		g.AddAssign(&g2Gen)
+	}
+
+	// final scalar to use in double and add method (without mixer factor)
+	// n(n+1)(2n+1)/6  (sum of the squares from 1 to n)
+	var scalar big.Int
+	scalar.SetInt64(nbSamples)
+	scalar.Mul(&scalar, new(big.Int).SetInt64(nbSamples+1))
+	scalar.Mul(&scalar, new(big.Int).SetInt64(2*nbSamples+1))
+	scalar.Div(&scalar, new(big.Int).SetInt64(6))
+
+	// ensure a multiexp that's splitted has the same result as a non-splitted one..
+	properties.Property("[G2] Multi exponentation (c=16) should be consistant with splitted multiexp", prop.ForAll(
+		func(mixer fr.Element) bool {
+			var samplePointsLarge [nbSamples * 13]G2Affine
+			for i := 0; i < 13; i++ {
+				copy(samplePointsLarge[i*nbSamples:], samplePoints[:])
+			}
+
+			var r16, splitted1, splitted2 G2Jac
+
+			// mixer ensures that all the words of a fpElement are set
+			var sampleScalars [nbSamples * 13]fr.Element
+
+			for i := 1; i <= nbSamples; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+			}
+
+			scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+			r16.msmC16(samplePoints[:], scalars16, true)
+
+			splitted1.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 128})
+			splitted2.MultiExp(samplePointsLarge[:], sampleScalars[:], ecc.MultiExpConfig{NbTasks: 51})
+			return r16.Equal(&splitted1) && r16.Equal(&splitted2)
+		},
+		genScalar,
+	))
+
+	if testing.Short() {
+		// we test only c = 5 and c = 16
+
+		properties.Property("[G2] Multi exponentation (c=5, c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var expected G2Jac
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars5, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				scalars16, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+
+				var r5, r16 G2Jac
+				r5.msmC5(samplePoints[:], scalars5, false)
+				r16.msmC16(samplePoints[:], scalars16, true)
+				return (r5.Equal(&expected) && r16.Equal(&expected))
+			},
+			genScalar,
+		))
+	} else {
+
+		properties.Property("[G2] Multi exponentation (c=4) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 4, false, runtime.NumCPU())
+				result.msmC4(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=5) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 5, false, runtime.NumCPU())
+				result.msmC5(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=6) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 6, false, runtime.NumCPU())
+				result.msmC6(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=7) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 7, false, runtime.NumCPU())
+				result.msmC7(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=8) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 8, false, runtime.NumCPU())
+				result.msmC8(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=9) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 9, false, runtime.NumCPU())
+				result.msmC9(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=10) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 10, false, runtime.NumCPU())
+				result.msmC10(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=11) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 11, false, runtime.NumCPU())
+				result.msmC11(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=12) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 12, false, runtime.NumCPU())
+				result.msmC12(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=13) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 13, false, runtime.NumCPU())
+				result.msmC13(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=14) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 14, false, runtime.NumCPU())
+				result.msmC14(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=15) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 15, false, runtime.NumCPU())
+				result.msmC15(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=16) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 16, false, runtime.NumCPU())
+				result.msmC16(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=20) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 20, false, runtime.NumCPU())
+				result.msmC20(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=21) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 21, false, runtime.NumCPU())
+				result.msmC21(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+		properties.Property("[G2] Multi exponentation (c=22) should be consistant with sum of square", prop.ForAll(
+			func(mixer fr.Element) bool {
+
+				var result, expected G2Jac
+
+				// mixer ensures that all the words of a fpElement are set
+				var sampleScalars [nbSamples]fr.Element
+
+				for i := 1; i <= nbSamples; i++ {
+					sampleScalars[i-1].SetUint64(uint64(i)).
+						Mul(&sampleScalars[i-1], &mixer).
+						FromMont()
+				}
+
+				scalars, _ := partitionScalars(sampleScalars[:], 22, false, runtime.NumCPU())
+				result.msmC22(samplePoints[:], scalars, false)
+
+				// compute expected result with double and add
+				var finalScalar, mixerBigInt big.Int
+				finalScalar.Mul(&scalar, mixer.ToBigIntRegular(&mixerBigInt))
+				expected.ScalarMultiplication(&g2Gen, &finalScalar)
+
+				return result.Equal(&expected)
+			},
+			genScalar,
+		))
+
+	}
+
+	// note : this test is here as we expect to have a different multiExp than the above bucket method
+	// for small number of points
+	properties.Property("[G2] Multi exponentation (<50points) should be consistant with sum of square", prop.ForAll(
+		func(mixer fr.Element) bool {
+
+			var g G2Jac
+			g.Set(&g2Gen)
+
+			// mixer ensures that all the words of a fpElement are set
+			samplePoints := make([]G2Affine, 30)
+			sampleScalars := make([]fr.Element, 30)
+
+			for i := 1; i <= 30; i++ {
+				sampleScalars[i-1].SetUint64(uint64(i)).
+					Mul(&sampleScalars[i-1], &mixer).
+					FromMont()
+				samplePoints[i-1].FromJacobian(&g)
+				g.AddAssign(&g2Gen)
+			}
+
+			var op1MultiExp G2Affine
+			op1MultiExp.MultiExp(samplePoints, sampleScalars, ecc.MultiExpConfig{})
+
+			var finalBigScalar fr.Element
+			var finalBigScalarBi big.Int
+			var op1ScalarMul G2Affine
+			finalBigScalar.SetString("9455").Mul(&finalBigScalar, &mixer)
+			finalBigScalar.ToBigIntRegular(&finalBigScalarBi)
+			op1ScalarMul.ScalarMultiplication(&g2GenAff, &finalBigScalarBi)
+
+			return op1ScalarMul.Equal(&op1MultiExp)
+		},
+		genScalar,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func BenchmarkMultiExpG2(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const pow = (bits.UintSize / 2) - (bits.UintSize / 8) // 24 on 64 bits arch, 12 on 32 bits
+	const nbSamples = 1 << pow
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	for i := 5; i <= pow; i++ {
+		using := 1 << i
+
+		b.Run(fmt.Sprintf("%d points", using), func(b *testing.B) {
+			b.ResetTimer()
+			for j := 0; j < b.N; j++ {
+				testPoint.MultiExp(samplePoints[:using], sampleScalars[:using], ecc.MultiExpConfig{})
+			}
+		})
+	}
+}
+
+func BenchmarkMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var testPoint G2Affine
+
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		testPoint.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+	}
+}
+
+func BenchmarkManyMultiExpG2Reference(b *testing.B) {
+	// ensure every words of the scalars are filled
+	var mixer fr.Element
+	mixer.SetString("7716837800905789770901243404444209691916730933998574719964609384059111546487")
+
+	const nbSamples = 1 << 20
+
+	var samplePoints [nbSamples]G2Affine
+	var sampleScalars [nbSamples]fr.Element
+
+	for i := 1; i <= nbSamples; i++ {
+		sampleScalars[i-1].SetUint64(uint64(i)).
+			Mul(&sampleScalars[i-1], &mixer).
+			FromMont()
+		samplePoints[i-1] = g2GenAff
+	}
+
+	var t1, t2, t3 G2Affine
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		var wg sync.WaitGroup
+		wg.Add(3)
+		go func() {
+			t1.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t2.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		go func() {
+			t3.MultiExp(samplePoints[:], sampleScalars[:], ecc.MultiExpConfig{})
+			wg.Done()
+		}()
+		wg.Wait()
+	}
+}
diff --git a/ecc/bls12-378/pairing.go b/ecc/bls12-378/pairing.go
new file mode 100644
index 000000000..08ed3775e
--- /dev/null
+++ b/ecc/bls12-378/pairing.go
@@ -0,0 +1,241 @@
+// Copyright 2020 ConsenSys AG
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package bls12378
+
+import (
+	"errors"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/internal/fptower"
+)
+
+// GT target group of the pairing
+type GT = fptower.E12
+
+type lineEvaluation struct {
+	r0 fptower.E2
+	r1 fptower.E2
+	r2 fptower.E2
+}
+
+// Pair calculates the reduced pairing for a set of points
+func Pair(P []G1Affine, Q []G2Affine) (GT, error) {
+	f, err := MillerLoop(P, Q)
+	if err != nil {
+		return GT{}, err
+	}
+	return FinalExponentiation(&f), nil
+}
+
+// PairingCheck calculates the reduced pairing for a set of points and returns True if the result is One
+func PairingCheck(P []G1Affine, Q []G2Affine) (bool, error) {
+	f, err := Pair(P, Q)
+	if err != nil {
+		return false, err
+	}
+	var one GT
+	one.SetOne()
+	return f.Equal(&one), nil
+}
+
+// FinalExponentiation computes the final expo x**(p**6-1)(p**2+1)(p**4 - p**2 +1)/r
+func FinalExponentiation(z *GT, _z ...*GT) GT {
+	var result GT
+	result.Set(z)
+
+	for _, e := range _z {
+		result.Mul(&result, e)
+	}
+
+	// https://eprint.iacr.org/2016/130.pdf
+	var t [3]GT
+
+	// easy part
+	t[0].Conjugate(&result)
+	result.Inverse(&result)
+	t[0].Mul(&t[0], &result)
+	result.FrobeniusSquare(&t[0]).
+		Mul(&result, &t[0])
+
+	// hard part (up to permutation)
+	// Daiki Hayashida and Kenichiro Hayasaka
+	// and Tadanori Teruya
+	// https://eprint.iacr.org/2020/875.pdf
+	t[0].CyclotomicSquare(&result)
+	t[1].Expt(&result)
+	t[2].InverseUnitary(&result)
+	t[1].Mul(&t[1], &t[2])
+	t[2].Expt(&t[1])
+	t[1].InverseUnitary(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	t[2].Expt(&t[1])
+	t[1].Frobenius(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	result.Mul(&result, &t[0])
+	t[0].Expt(&t[1])
+	t[2].Expt(&t[0])
+	t[0].FrobeniusSquare(&t[1])
+	t[1].InverseUnitary(&t[1])
+	t[1].Mul(&t[1], &t[2])
+	t[1].Mul(&t[1], &t[0])
+	result.Mul(&result, &t[1])
+
+	return result
+}
+
+// MillerLoop Miller loop
+func MillerLoop(P []G1Affine, Q []G2Affine) (GT, error) {
+	// check input size match
+	n := len(P)
+	if n == 0 || n != len(Q) {
+		return GT{}, errors.New("invalid inputs sizes")
+	}
+
+	// filter infinity points
+	p := make([]G1Affine, 0, n)
+	q := make([]G2Affine, 0, n)
+
+	for k := 0; k < n; k++ {
+		if P[k].IsInfinity() || Q[k].IsInfinity() {
+			continue
+		}
+		p = append(p, P[k])
+		q = append(q, Q[k])
+	}
+
+	n = len(p)
+
+	// projective points for Q
+	qProj := make([]g2Proj, n)
+	for k := 0; k < n; k++ {
+		qProj[k].FromAffine(&q[k])
+	}
+
+	var result, lines GT
+	result.SetOne()
+
+	var l1, l2 lineEvaluation
+
+	// i == 62
+	for k := 0; k < n; k++ {
+		qProj[k].DoubleStep(&l1)
+		// line eval
+		l1.r1.MulByElement(&l1.r1, &p[k].X)
+		l1.r2.MulByElement(&l1.r2, &p[k].Y)
+		result.MulBy014(&l1.r0, &l1.r1, &l1.r2)
+	}
+
+	for i := 61; i >= 0; i-- {
+		result.Square(&result)
+
+		for k := 0; k < n; k++ {
+			qProj[k].DoubleStep(&l1)
+			// line eval
+			l1.r1.MulByElement(&l1.r1, &p[k].X)
+			l1.r2.MulByElement(&l1.r2, &p[k].Y)
+
+			if loopCounter[i] == 0 {
+				result.MulBy014(&l1.r0, &l1.r1, &l1.r2)
+			} else {
+				qProj[k].AddMixedStep(&l2, &q[k])
+				// line eval
+				l2.r1.MulByElement(&l2.r1, &p[k].X)
+				l2.r2.MulByElement(&l2.r2, &p[k].Y)
+				lines.Mul014By014(&l1.r0, &l1.r1, &l1.r2, &l2.r0, &l2.r1, &l2.r2)
+				result.Mul(&result, &lines)
+			}
+		}
+	}
+
+	return result, nil
+}
+
+// DoubleStep doubles a point in Homogenous projective coordinates, and evaluates the line in Miller loop
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g2Proj) DoubleStep(l *lineEvaluation) {
+
+	// get some Element from our pool
+	var t1, A, B, C, D, E, EE, F, G, H, I, J, K fptower.E2
+	A.Mul(&p.x, &p.y)
+	A.Halve()
+	B.Square(&p.y)
+	C.Square(&p.z)
+	D.Double(&C).
+		Add(&D, &C)
+	E.Mul(&D, &bTwistCurveCoeff)
+	F.Double(&E).
+		Add(&F, &E)
+	G.Add(&B, &F)
+	G.Halve()
+	H.Add(&p.y, &p.z).
+		Square(&H)
+	t1.Add(&B, &C)
+	H.Sub(&H, &t1)
+	I.Sub(&E, &B)
+	J.Square(&p.x)
+	EE.Square(&E)
+	K.Double(&EE).
+		Add(&K, &EE)
+
+	// X, Y, Z
+	p.x.Sub(&B, &F).
+		Mul(&p.x, &A)
+	p.y.Square(&G).
+		Sub(&p.y, &K)
+	p.z.Mul(&B, &H)
+
+	// Line evaluation
+	l.r0.Set(&I)
+	l.r1.Double(&J).
+		Add(&l.r1, &J)
+	l.r2.Neg(&H)
+
+}
+
+// AddMixedStep point addition in Mixed Homogenous projective and Affine coordinates
+// https://eprint.iacr.org/2013/722.pdf (Section 4.3)
+func (p *g2Proj) AddMixedStep(l *lineEvaluation, a *G2Affine) {
+
+	// get some Element from our pool
+	var Y2Z1, X2Z1, O, L, C, D, E, F, G, H, t0, t1, t2, J fptower.E2
+	Y2Z1.Mul(&a.Y, &p.z)
+	O.Sub(&p.y, &Y2Z1)
+	X2Z1.Mul(&a.X, &p.z)
+	L.Sub(&p.x, &X2Z1)
+	C.Square(&O)
+	D.Square(&L)
+	E.Mul(&L, &D)
+	F.Mul(&p.z, &C)
+	G.Mul(&p.x, &D)
+	t0.Double(&G)
+	H.Add(&E, &F).
+		Sub(&H, &t0)
+	t1.Mul(&p.y, &E)
+
+	// X, Y, Z
+	p.x.Mul(&L, &H)
+	p.y.Sub(&G, &H).
+		Mul(&p.y, &O).
+		Sub(&p.y, &t1)
+	p.z.Mul(&E, &p.z)
+
+	t2.Mul(&L, &a.Y)
+	J.Mul(&a.X, &O).
+		Sub(&J, &t2)
+
+	// Line evaluation
+	l.r0.Set(&J)
+	l.r1.Neg(&O)
+	l.r2.Set(&L)
+}
diff --git a/ecc/bls12-378/pairing_test.go b/ecc/bls12-378/pairing_test.go
new file mode 100644
index 000000000..5dd1db441
--- /dev/null
+++ b/ecc/bls12-378/pairing_test.go
@@ -0,0 +1,305 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"fmt"
+	"math/big"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestPairing(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genR1 := GenFr()
+	genR2 := GenFr()
+
+	properties.Property("[BLS12-378] Having the receiver as operand (final expo) should output the same result", prop.ForAll(
+		func(a GT) bool {
+			b := a
+			b = FinalExponentiation(&a)
+			a = FinalExponentiation(&a)
+			return a.Equal(&b)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Exponentiating FinalExpo(a) to r should output 1", prop.ForAll(
+		func(a GT) bool {
+			b := FinalExponentiation(&a)
+			return !a.IsInSubGroup() && b.IsInSubGroup()
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] Expt(Expt) and Exp(t^2) should output the same result in the cyclotomic subgroup", prop.ForAll(
+		func(a GT) bool {
+			var b, c, d GT
+			b.Conjugate(&a)
+			a.Inverse(&a)
+			b.Mul(&b, &a)
+
+			a.FrobeniusSquare(&b).
+				Mul(&a, &b)
+
+			c.Expt(&a).Expt(&c)
+			d.Exp(&a, xGen).Exp(&d, xGen)
+			return c.Equal(&d)
+		},
+		genA,
+	))
+
+	properties.Property("[BLS12-378] bilinearity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var res, resa, resb, resab, zero GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint, ab big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+			ab.Mul(&abigint, &bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			res, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+			resa, _ = Pair([]G1Affine{ag1}, []G2Affine{g2GenAff})
+			resb, _ = Pair([]G1Affine{g1GenAff}, []G2Affine{bg2})
+
+			resab.Exp(&res, ab)
+			resa.Exp(&resa, bbigint)
+			resb.Exp(&resb, abigint)
+
+			return resab.Equal(&resa) && resab.Equal(&resb) && !res.Equal(&zero)
+
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] MillerLoop of pairs should be equal to the product of MillerLoops", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var simpleProd, factorizedProd GT
+
+			var ag1 G1Affine
+			var bg2 G2Affine
+
+			var abigint, bbigint big.Int
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			P0 := []G1Affine{g1GenAff}
+			P1 := []G1Affine{ag1}
+			Q0 := []G2Affine{g2GenAff}
+			Q1 := []G2Affine{bg2}
+
+			// FE( ML(a,b) * ML(c,d) * ML(e,f) * ML(g,h) )
+			M1, _ := MillerLoop(P0, Q0)
+			M2, _ := MillerLoop(P1, Q0)
+			M3, _ := MillerLoop(P0, Q1)
+			M4, _ := MillerLoop(P1, Q1)
+			simpleProd.Mul(&M1, &M2).Mul(&simpleProd, &M3).Mul(&simpleProd, &M4)
+			simpleProd = FinalExponentiation(&simpleProd)
+
+			tabP := []G1Affine{g1GenAff, ag1, g1GenAff, ag1}
+			tabQ := []G2Affine{g2GenAff, g2GenAff, bg2, bg2}
+
+			// FE( ML([a,c,e,g] ; [b,d,f,h]) ) -> saves 3 squares in Fqk
+			factorizedProd, _ = Pair(tabP, tabQ)
+
+			return simpleProd.Equal(&factorizedProd)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] PairingCheck", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var g1GenAffNeg G1Affine
+			g1GenAffNeg.Neg(&g1GenAff)
+			tabP := []G1Affine{g1GenAff, g1GenAffNeg}
+			tabQ := []G2Affine{g2GenAff, g2GenAff}
+
+			res, _ := PairingCheck(tabP, tabQ)
+
+			return res
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.Property("[BLS12-378] MillerLoop should skip pairs with a point at infinity", prop.ForAll(
+		func(a, b fr.Element) bool {
+
+			var one GT
+
+			var ag1, g1Inf G1Affine
+			var bg2, g2Inf G2Affine
+
+			var abigint, bbigint big.Int
+
+			one.SetOne()
+
+			a.ToBigIntRegular(&abigint)
+			b.ToBigIntRegular(&bbigint)
+
+			ag1.ScalarMultiplication(&g1GenAff, &abigint)
+			bg2.ScalarMultiplication(&g2GenAff, &bbigint)
+
+			g1Inf.FromJacobian(&g1Infinity)
+			g2Inf.FromJacobian(&g2Infinity)
+
+			// e([0,c] ; [b,d])
+			tabP := []G1Affine{g1Inf, ag1}
+			tabQ := []G2Affine{g2GenAff, bg2}
+			res1, _ := Pair(tabP, tabQ)
+
+			// e([a,c] ; [0,d])
+			tabP = []G1Affine{g1GenAff, ag1}
+			tabQ = []G2Affine{g2Inf, bg2}
+			res2, _ := Pair(tabP, tabQ)
+
+			// e([0,c] ; [d,0])
+			tabP = []G1Affine{g1Inf, ag1}
+			tabQ = []G2Affine{bg2, g2Inf}
+			res3, _ := Pair(tabP, tabQ)
+
+			return res1.Equal(&res2) && !res2.Equal(&res3) && res3.Equal(&one)
+		},
+		genR1,
+		genR2,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkPairing(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		Pair([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkMillerLoop(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		MillerLoop([]G1Affine{g1GenAff}, []G2Affine{g2GenAff})
+	}
+}
+
+func BenchmarkFinalExponentiation(b *testing.B) {
+
+	var a GT
+	a.SetRandom()
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		FinalExponentiation(&a)
+	}
+
+}
+
+func BenchmarkMultiMiller(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				MillerLoop(P, Q)
+			}
+		})
+	}
+}
+
+func BenchmarkMultiPair(b *testing.B) {
+
+	var g1GenAff G1Affine
+	var g2GenAff G2Affine
+
+	g1GenAff.FromJacobian(&g1Gen)
+	g2GenAff.FromJacobian(&g2Gen)
+
+	n := 10
+	P := make([]G1Affine, n)
+	Q := make([]G2Affine, n)
+
+	for i := 2; i <= n; i++ {
+		for j := 0; j < i; j++ {
+			P[j].Set(&g1GenAff)
+			Q[j].Set(&g2GenAff)
+		}
+		b.Run(fmt.Sprintf("%d pairs", i), func(b *testing.B) {
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				Pair(P, Q)
+			}
+		})
+	}
+}
diff --git a/ecc/bls12-378/twistededwards/doc.go b/ecc/bls12-378/twistededwards/doc.go
new file mode 100644
index 000000000..584dc49da
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/doc.go
@@ -0,0 +1,18 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package twistededwards provides bls12-378's twisted edwards "companion curve" defined on fr.
+package twistededwards
diff --git a/ecc/bls12-378/twistededwards/eddsa/doc.go b/ecc/bls12-378/twistededwards/eddsa/doc.go
new file mode 100644
index 000000000..e19c483f7
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/doc.go
@@ -0,0 +1,22 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+// Package eddsa provides EdDSA signature scheme on bls12-378's twisted edwards curve.
+//
+// See also
+//
+// https://en.wikipedia.org/wiki/EdDSA
+package eddsa
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa.go b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
new file mode 100644
index 000000000..00f78b442
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
@@ -0,0 +1,265 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"errors"
+	"hash"
+	"io"
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/twistededwards"
+	"github.com/consensys/gnark-crypto/signature"
+	"golang.org/x/crypto/blake2b"
+)
+
+var errNotOnCurve = errors.New("point not on curve")
+
+const (
+	sizeFr         = fr.Bytes
+	sizePublicKey  = sizeFr
+	sizeSignature  = 2 * sizeFr
+	sizePrivateKey = 2*sizeFr + 32
+)
+
+// PublicKey eddsa signature object
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type PublicKey struct {
+	A twistededwards.PointAffine
+}
+
+// PrivateKey private key of an eddsa instance
+type PrivateKey struct {
+	PublicKey PublicKey    // copy of the associated public key
+	scalar    [sizeFr]byte // secret scalar, in big Endian
+	randSrc   [32]byte     // source
+}
+
+// Signature represents an eddsa signature
+// cf https://en.wikipedia.org/wiki/EdDSA for notation
+type Signature struct {
+	R twistededwards.PointAffine
+	S [sizeFr]byte
+}
+
+func init() {
+	signature.Register(signature.EDDSA_BLS12_378, GenerateKeyInterfaces)
+}
+
+// GenerateKey generates a public and private key pair.
+func GenerateKey(r io.Reader) (PrivateKey, error) {
+
+	c := twistededwards.GetEdwardsCurve()
+
+	var pub PublicKey
+	var priv PrivateKey
+
+	// hash(h) = private_key || random_source, on 32 bytes each
+	seed := make([]byte, 32)
+	_, err := r.Read(seed)
+	if err != nil {
+		return priv, err
+	}
+	h := blake2b.Sum512(seed[:])
+	for i := 0; i < 32; i++ {
+		priv.randSrc[i] = h[i+32]
+	}
+
+	// prune the key
+	// https://tools.ietf.org/html/rfc8032#section-5.1.5, key generation
+
+	h[0] &= 0xF8
+	h[31] &= 0x7F
+	h[31] |= 0x40
+
+	// reverse first bytes because setBytes interpret stream as big endian
+	// but in eddsa specs s is the first 32 bytes in little endian
+	for i, j := 0, sizeFr; i < j; i, j = i+1, j-1 {
+
+		h[i], h[j] = h[j], h[i]
+
+	}
+
+	copy(priv.scalar[:], h[:sizeFr])
+
+	var bscalar big.Int
+	bscalar.SetBytes(priv.scalar[:])
+	pub.A.ScalarMul(&c.Base, &bscalar)
+
+	priv.PublicKey = pub
+
+	return priv, nil
+}
+
+// GenerateKeyInterfaces generate interfaces for the public/private key.
+// This purpose of this function is to be registered in the list of signature schemes.
+func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
+	priv, err := GenerateKey(r)
+	return &priv, err
+}
+
+// Equal compares 2 public keys
+func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+	bpk := pub.Bytes()
+	bother := other.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bother) == 1
+}
+
+// Public returns the public key associated to the private key.
+// From Signer interface defined in gnark/crypto/signature.
+func (privKey *PrivateKey) Public() signature.PublicKey {
+	var pub PublicKey
+	pub.A.Set(&privKey.PublicKey.A)
+	return &pub
+}
+
+// Sign sign a message
+// Pure Eddsa version (see https://tools.ietf.org/html/rfc8032#page-8)
+func (privKey *PrivateKey) Sign(message []byte, hFunc hash.Hash) ([]byte, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	var res Signature
+
+	// blinding factor for the private key
+	// blindingFactorBigInt must be the same size as the private key,
+	// blindingFactorBigInt = h(randomness_source||message)[:sizeFr]
+	var blindingFactorBigInt big.Int
+
+	// randSrc = privKey.randSrc || msg (-> message = MSB message .. LSB message)
+	randSrc := make([]byte, 32+len(message))
+	for i, v := range privKey.randSrc {
+		randSrc[i] = v
+	}
+	copy(randSrc[32:], message)
+
+	// randBytes = H(randSrc)
+	blindingFactorBytes := blake2b.Sum512(randSrc[:]) // TODO ensures that the hash used to build the key and the one used here is the same
+	blindingFactorBigInt.SetBytes(blindingFactorBytes[:sizeFr])
+
+	// compute R = randScalar*Base
+	res.R.ScalarMul(&curveParams.Base, &blindingFactorBigInt)
+	if !res.R.IsOnCurve() {
+		return nil, errNotOnCurve
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	resRX := res.R.X.Bytes()
+	resRY := res.R.Y.Bytes()
+	resAX := privKey.PublicKey.A.X.Bytes()
+	resAY := privKey.PublicKey.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], resRX[:])
+	copy(dataToHash[sizeFr:], resRY[:])
+	copy(dataToHash[2*sizeFr:], resAX[:])
+	copy(dataToHash[3*sizeFr:], resAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	_, err := hFunc.Write(dataToHash[:])
+	if err != nil {
+		return nil, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// Compute s = randScalarInt + H(R,A,M)*S
+	// going with big int to do ops mod curve order
+	var bscalar, bs big.Int
+	bscalar.SetBytes(privKey.scalar[:])
+	bs.Mul(&hramInt, &bscalar).
+		Add(&bs, &blindingFactorBigInt).
+		Mod(&bs, &curveParams.Order)
+	sb := bs.Bytes()
+	if len(sb) < sizeFr {
+		offset := make([]byte, sizeFr-len(sb))
+		sb = append(offset, sb...)
+	}
+	copy(res.S[:], sb[:])
+
+	return res.Bytes(), nil
+}
+
+// Verify verifies an eddsa signature
+func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, error) {
+
+	curveParams := twistededwards.GetEdwardsCurve()
+
+	// verify that pubKey and R are on the curve
+	if !pub.A.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// Deserialize the signature
+	var sig Signature
+	if _, err := sig.SetBytes(sigBin); err != nil {
+		return false, err
+	}
+
+	// compute H(R, A, M), all parameters in data are in Montgomery form
+	sigRX := sig.R.X.Bytes()
+	sigRY := sig.R.Y.Bytes()
+	sigAX := pub.A.X.Bytes()
+	sigAY := pub.A.Y.Bytes()
+	sizeDataToHash := 4*sizeFr + len(message)
+	dataToHash := make([]byte, sizeDataToHash)
+	copy(dataToHash[:], sigRX[:])
+	copy(dataToHash[sizeFr:], sigRY[:])
+	copy(dataToHash[2*sizeFr:], sigAX[:])
+	copy(dataToHash[3*sizeFr:], sigAY[:])
+	copy(dataToHash[4*sizeFr:], message)
+	hFunc.Reset()
+	if _, err := hFunc.Write(dataToHash[:]); err != nil {
+		return false, err
+	}
+
+	var hramInt big.Int
+	hramBin := hFunc.Sum(nil)
+	hramInt.SetBytes(hramBin)
+
+	// lhs = cofactor*S*Base
+	var lhs twistededwards.PointAffine
+	var bCofactor, bs big.Int
+	curveParams.Cofactor.ToBigInt(&bCofactor)
+	bs.SetBytes(sig.S[:])
+	lhs.ScalarMul(&curveParams.Base, &bs).
+		ScalarMul(&lhs, &bCofactor)
+
+	if !lhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// rhs = cofactor*(R + H(R,A,M)*A)
+	var rhs twistededwards.PointAffine
+	rhs.ScalarMul(&pub.A, &hramInt).
+		Add(&rhs, &sig.R).
+		ScalarMul(&rhs, &bCofactor)
+	if !rhs.IsOnCurve() {
+		return false, errNotOnCurve
+	}
+
+	// verifies that cofactor*S*Base=cofactor*(R + H(R,A,M)*A)
+	if !lhs.X.Equal(&rhs.X) || !lhs.Y.Equal(&rhs.Y) {
+		return false, nil
+	}
+
+	return true, nil
+}
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
new file mode 100644
index 000000000..b46dec00c
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
@@ -0,0 +1,208 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/sha256"
+	"math/rand"
+	"testing"
+
+	crand "crypto/rand"
+
+	"fmt"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/consensys/gnark-crypto/hash"
+	"github.com/consensys/gnark-crypto/signature"
+)
+
+func Example() {
+	// instantiate hash function
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	// create a eddsa key pair
+	privateKey, _ := signature.EDDSA_BLS12_378.New(crand.Reader)
+	publicKey := privateKey.Public()
+
+	// note that the message is on 4 bytes
+	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
+
+	// sign the message
+	signature, _ := privateKey.Sign(msg, hFunc)
+
+	// verifies signature
+	isValid, _ := publicKey.Verify(signature, msg, hFunc)
+	if !isValid {
+		fmt.Println("1. invalid signature")
+	} else {
+		fmt.Println("1. valid signature")
+	}
+
+	// Output: 1. valid signature
+}
+
+func TestSerialization(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	privKey1, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey1 := privKey1.Public()
+
+	privKey2, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(err)
+	}
+	pubKey2 := privKey2.Public()
+
+	pubKeyBin1 := pubKey1.Bytes()
+	pubKey2.SetBytes(pubKeyBin1)
+	pubKeyBin2 := pubKey2.Bytes()
+	if len(pubKeyBin1) != len(pubKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(pubKeyBin1); i++ {
+		if pubKeyBin1[i] != pubKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+
+	privKeyBin1 := privKey1.Bytes()
+	privKey2.SetBytes(privKeyBin1)
+	privKeyBin2 := privKey2.Bytes()
+	if len(privKeyBin1) != len(privKeyBin2) {
+		t.Fatal("Inconistent size")
+	}
+	for i := 0; i < len(privKeyBin1); i++ {
+		if privKeyBin1[i] != privKeyBin2[i] {
+			t.Fatal("Error serialize(deserialize(.))")
+		}
+	}
+}
+
+func TestEddsaMIMC(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	if err != nil {
+		t.Fatal(nil)
+	}
+	pubKey := privKey.Public()
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, err := privKey.Sign(msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035979")
+	msgBin = frMsg.Bytes()
+	res, err = pubKey.Verify(signature, msgBin[:], hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+func TestEddsaSHA256(t *testing.T) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := sha256.New()
+
+	// create eddsa obj and sign a message
+	// create eddsa obj and sign a message
+
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	signature, err := privKey.Sign([]byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// verifies correct msg
+	res, err := pubKey.Verify(signature, []byte("message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !res {
+		t.Fatal("Verifiy correct signature should return true")
+	}
+
+	// verifies wrong msg
+	res, err = pubKey.Verify(signature, []byte("wrong_message"), hFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if res {
+		t.Fatal("Verfiy wrong signature should be false")
+	}
+
+}
+
+// benchmarks
+
+func BenchmarkVerify(b *testing.B) {
+
+	src := rand.NewSource(0)
+	r := rand.New(src)
+
+	hFunc := hash.MIMC_BLS12_378.New("seed")
+
+	// create eddsa obj and sign a message
+	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	pubKey := privKey.Public()
+	if err != nil {
+		b.Fatal(err)
+	}
+	var frMsg fr.Element
+	frMsg.SetString("44717650746155748460101257525078853138837311576962212923649547644148297035978")
+	msgBin := frMsg.Bytes()
+	signature, _ := privKey.Sign(msgBin[:], hFunc)
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		pubKey.Verify(signature, msgBin[:], hFunc)
+	}
+}
diff --git a/ecc/bls12-378/twistededwards/eddsa/marshal.go b/ecc/bls12-378/twistededwards/eddsa/marshal.go
new file mode 100644
index 000000000..c68129087
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/eddsa/marshal.go
@@ -0,0 +1,133 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package eddsa
+
+import (
+	"crypto/subtle"
+	"io"
+)
+
+// Bytes returns the binary representation of the public key
+// follows https://tools.ietf.org/html/rfc8032#section-3.1
+// and returns a compressed representation of the point (x,y)
+//
+// x, y are the coordinates of the point
+// on the twisted Edwards as big endian integers.
+// compressed representation store x with a parity bit to recompute y
+func (pk *PublicKey) Bytes() []byte {
+	var res [sizePublicKey]byte
+	pkBin := pk.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pkBin[:])
+	return res[:]
+}
+
+// SetBytes sets p from binary representation in buf.
+// buf represents a public key as x||y where x, y are
+// interpreted as big endian binary numbers corresponding
+// to the coordinates of a point on the twisted Edwards.
+// It returns the number of bytes read from the buffer.
+func (pk *PublicKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePublicKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := pk.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !pk.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	return n, nil
+}
+
+// Bytes returns the binary representation of pk,
+// as byte array publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+func (privKey *PrivateKey) Bytes() []byte {
+	var res [sizePrivateKey]byte
+	pubkBin := privKey.PublicKey.A.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], pubkBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:2*sizeFr], privKey.scalar[:])
+	subtle.ConstantTimeCopy(1, res[2*sizeFr:], privKey.randSrc[:])
+	return res[:]
+}
+
+// SetBytes sets pk from buf, where buf is interpreted
+// as  publicKey||scalar||randSrc
+// where publicKey is as publicKey.Bytes(), and
+// scalar is in big endian, of size sizeFr.
+// It returns the number byte read.
+func (privKey *PrivateKey) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizePrivateKey {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := privKey.PublicKey.A.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !privKey.PublicKey.A.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, privKey.scalar[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	subtle.ConstantTimeCopy(1, privKey.randSrc[:], buf[2*sizeFr:])
+	n += sizeFr
+	return n, nil
+}
+
+// Bytes returns the binary representation of sig
+// as a byte array of size 3*sizeFr x||y||s where
+// * x, y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+func (sig *Signature) Bytes() []byte {
+	var res [sizeSignature]byte
+	sigRBin := sig.R.Bytes()
+	subtle.ConstantTimeCopy(1, res[:sizeFr], sigRBin[:])
+	subtle.ConstantTimeCopy(1, res[sizeFr:], sig.S[:])
+	return res[:]
+}
+
+// SetBytes sets sig from a buffer in binary.
+// buf is read interpreted as x||y||s where
+// * x,y are the coordinates of a point on the twisted
+//	Edwards represented in big endian
+// * s=r+h(r,a,m) mod l, the Hasse bound guarantess that
+//	s is smaller than sizeFr (in particular it is supposed
+// 	s is NOT blinded)
+// It returns the number of bytes read from buf.
+func (sig *Signature) SetBytes(buf []byte) (int, error) {
+	n := 0
+	if len(buf) < sizeSignature {
+		return n, io.ErrShortBuffer
+	}
+	if _, err := sig.R.SetBytes(buf[:sizeFr]); err != nil {
+		return 0, err
+	}
+	n += sizeFr
+	if !sig.R.IsOnCurve() {
+		return n, errNotOnCurve
+	}
+	subtle.ConstantTimeCopy(1, sig.S[:], buf[sizeFr:2*sizeFr])
+	n += sizeFr
+	return n, nil
+}
diff --git a/ecc/bls12-378/twistededwards/point.go b/ecc/bls12-378/twistededwards/point.go
new file mode 100644
index 000000000..8f6c45f7a
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/point.go
@@ -0,0 +1,411 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"crypto/subtle"
+	"io"
+	"math/big"
+	"math/bits"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// PointAffine point on a twisted Edwards curve
+type PointAffine struct {
+	X, Y fr.Element
+}
+
+// PointProj point in projective coordinates
+type PointProj struct {
+	X, Y, Z fr.Element
+}
+
+const (
+	//following https://tools.ietf.org/html/rfc8032#section-3.1,
+	// an fr element x is negative if its binary encoding is
+	// lexicographically larger than -x.
+	mCompressedNegative = 0x80
+	mCompressedPositive = 0x00
+	mUnmask             = 0x7f
+
+	// size in byte of a compressed point (point.Y --> fr.Element)
+	sizePointCompressed = fr.Limbs * 8
+)
+
+// Bytes returns the compressed point as a byte array
+// Follows https://tools.ietf.org/html/rfc8032#section-3.1,
+// as the twisted Edwards implementation is primarily used
+// for eddsa.
+func (p *PointAffine) Bytes() [sizePointCompressed]byte {
+
+	var res [sizePointCompressed]byte
+	var mask uint
+
+	y := p.Y.Bytes()
+
+	if p.X.LexicographicallyLargest() {
+		mask = mCompressedNegative
+	} else {
+		mask = mCompressedPositive
+	}
+	// p.Y must be in little endian
+	y[0] |= byte(mask) // msb of y
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		y[i], y[j] = y[j], y[i]
+	}
+	subtle.ConstantTimeCopy(1, res[:], y[:])
+	return res
+}
+
+// Marshal converts p to a byte slice
+func (p *PointAffine) Marshal() []byte {
+	b := p.Bytes()
+	return b[:]
+}
+
+func computeX(y *fr.Element) (x fr.Element) {
+	var one, num, den fr.Element
+	one.SetOne()
+	num.Square(y)
+	den.Mul(&num, &edwards.D)
+	num.Sub(&one, &num)
+	den.Sub(&edwards.A, &den)
+	x.Div(&num, &den)
+	x.Sqrt(&x)
+	return
+}
+
+// SetBytes sets p from buf
+// len(buf) >= sizePointCompressed
+// buf contains the Y coordinate masked with a parity bit to recompute the X coordinate
+// from the curve equation. See Bytes() and https://tools.ietf.org/html/rfc8032#section-3.1
+// Returns the number of read bytes and an error if the buffer is too short.
+func (p *PointAffine) SetBytes(buf []byte) (int, error) {
+
+	if len(buf) < sizePointCompressed {
+		return 0, io.ErrShortBuffer
+	}
+	bufCopy := make([]byte, sizePointCompressed)
+	subtle.ConstantTimeCopy(1, bufCopy, buf[:sizePointCompressed])
+	for i, j := 0, sizePointCompressed-1; i < j; i, j = i+1, j-1 {
+		bufCopy[i], bufCopy[j] = bufCopy[j], bufCopy[i]
+	}
+	isLexicographicallyLargest := (mCompressedNegative&bufCopy[0])>>7 == 1
+	bufCopy[0] &= mUnmask
+	p.Y.SetBytes(bufCopy)
+	p.X = computeX(&p.Y)
+	if isLexicographicallyLargest {
+		if !p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	} else {
+		if p.X.LexicographicallyLargest() {
+			p.X.Neg(&p.X)
+		}
+	}
+
+	return sizePointCompressed, nil
+}
+
+// Unmarshal alias to SetBytes()
+func (p *PointAffine) Unmarshal(b []byte) error {
+	_, err := p.SetBytes(b)
+	return err
+}
+
+// Set sets p to p1 and return it
+func (p *PointProj) Set(p1 *PointProj) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.Set(&p1.Z)
+	return p
+}
+
+// Set sets p to p1 and return it
+func (p *PointAffine) Set(p1 *PointAffine) *PointAffine {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	return p
+}
+
+// Equal returns true if p=p1 false otherwise
+func (p *PointAffine) Equal(p1 *PointAffine) bool {
+	return p.X.Equal(&p1.X) && p.Y.Equal(&p1.Y)
+}
+
+// Equal returns true if p=p1 false otherwise
+// If one point is on the affine chart Z=0 it returns false
+func (p *PointProj) Equal(p1 *PointProj) bool {
+	if p.Z.IsZero() || p1.Z.IsZero() {
+		return false
+	}
+	var pAffine, p1Affine PointAffine
+	pAffine.FromProj(p)
+	p1Affine.FromProj(p1)
+	return pAffine.Equal(&p1Affine)
+}
+
+// NewPointAffine creates a new instance of PointAffine
+func NewPointAffine(x, y fr.Element) PointAffine {
+	return PointAffine{x, y}
+}
+
+// IsOnCurve checks if a point is on the twisted Edwards curve
+func (p *PointAffine) IsOnCurve() bool {
+
+	ecurve := GetEdwardsCurve()
+
+	var lhs, rhs, tmp fr.Element
+
+	tmp.Mul(&p.Y, &p.Y)
+	lhs.Mul(&p.X, &p.X)
+	mulByA(&lhs)
+	lhs.Add(&lhs, &tmp)
+
+	tmp.Mul(&p.X, &p.X).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &p.Y).
+		Mul(&tmp, &ecurve.D)
+	rhs.SetOne().Add(&rhs, &tmp)
+
+	return lhs.Equal(&rhs)
+}
+
+// Add adds two points (x,y), (u,v) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Add(p1, p2 *PointAffine) *PointAffine {
+
+	ecurve := GetEdwardsCurve()
+
+	var xu, yv, xv, yu, dxyuv, one, denx, deny fr.Element
+	pRes := new(PointAffine)
+	xv.Mul(&p1.X, &p2.Y)
+	yu.Mul(&p1.Y, &p2.X)
+	pRes.X.Add(&xv, &yu)
+
+	xu.Mul(&p1.X, &p2.X)
+	mulByA(&xu)
+	yv.Mul(&p1.Y, &p2.Y)
+	pRes.Y.Sub(&yv, &xu)
+
+	dxyuv.Mul(&xv, &yu).Mul(&dxyuv, &ecurve.D)
+	one.SetOne()
+	denx.Add(&one, &dxyuv)
+	deny.Sub(&one, &dxyuv)
+
+	p.X.Div(&pRes.X, &denx)
+	p.Y.Div(&pRes.Y, &deny)
+
+	return p
+}
+
+// Double doubles point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointAffine) Double(p1 *PointAffine) *PointAffine {
+
+	p.Set(p1)
+	var xx, yy, xy, denum, two fr.Element
+
+	xx.Square(&p.X)
+	yy.Square(&p.Y)
+	xy.Mul(&p.X, &p.Y)
+	mulByA(&xx)
+	denum.Add(&xx, &yy)
+
+	p.X.Double(&xy).Div(&p.X, &denum)
+
+	two.SetOne().Double(&two)
+	denum.Neg(&denum).Add(&denum, &two)
+
+	p.Y.Sub(&yy, &xx).Div(&p.Y, &denum)
+
+	return p
+}
+
+// Neg negates point (x,y) on a twisted Edwards curve with parameters a, d
+// modifies p
+func (p *PointProj) Neg(p1 *PointProj) *PointProj {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// FromProj sets p in affine from p in projective
+func (p *PointAffine) FromProj(p1 *PointProj) *PointAffine {
+	p.X.Div(&p1.X, &p1.Z)
+	p.Y.Div(&p1.Y, &p1.Z)
+	return p
+}
+
+// FromAffine sets p in projective from p in affine
+func (p *PointProj) FromAffine(p1 *PointAffine) *PointProj {
+	p.X.Set(&p1.X)
+	p.Y.Set(&p1.Y)
+	p.Z.SetOne()
+	return p
+}
+
+// Add adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-add-2008-bbjlp
+func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var A, B, C, D, E, F, G, H, I fr.Element
+	A.Mul(&p1.Z, &p2.Z)
+	B.Square(&A)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &A).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	C.Neg(&C)
+	res.Y.Add(&D, &C).
+		Mul(&res.Y, &A).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// MixedAdd adds a point in projective to a point in affine coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#addition-madd-2008-bbjlp
+func (p *PointProj) MixedAdd(p1 *PointProj, p2 *PointAffine) *PointProj {
+
+	var res PointProj
+
+	ecurve := GetEdwardsCurve()
+
+	var B, C, D, E, F, G, H, I fr.Element
+	B.Square(&p1.Z)
+	C.Mul(&p1.X, &p2.X)
+	D.Mul(&p1.Y, &p2.Y)
+	E.Mul(&ecurve.D, &C).Mul(&E, &D)
+	F.Sub(&B, &E)
+	G.Add(&B, &E)
+	H.Add(&p1.X, &p1.Y)
+	I.Add(&p2.X, &p2.Y)
+	res.X.Mul(&H, &I).
+		Sub(&res.X, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &p1.Z).
+		Mul(&res.X, &F)
+	mulByA(&C)
+	res.Y.Sub(&D, &C).
+		Mul(&res.Y, &p1.Z).
+		Mul(&res.Y, &G)
+	res.Z.Mul(&F, &G)
+
+	p.Set(&res)
+	return p
+}
+
+// Double adds points in projective coordinates
+// cf https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html#doubling-dbl-2008-bbjlp
+func (p *PointProj) Double(p1 *PointProj) *PointProj {
+
+	var res PointProj
+
+	var B, C, D, E, F, H, J fr.Element
+
+	B.Add(&p1.X, &p1.Y).Square(&B)
+	C.Square(&p1.X)
+	D.Square(&p1.Y)
+	E.Set(&C)
+	mulByA(&E)
+	F.Add(&E, &D)
+	H.Square(&p1.Z)
+	J.Sub(&F, &H).Sub(&J, &H)
+	res.X.Sub(&B, &C).
+		Sub(&res.X, &D).
+		Mul(&res.X, &J)
+	res.Y.Sub(&E, &D).Mul(&res.Y, &F)
+	res.Z.Mul(&F, &J)
+
+	p.Set(&res)
+	return p
+}
+
+// Neg sets p to -p1 and returns it
+func (p *PointAffine) Neg(p1 *PointAffine) *PointAffine {
+	p.Set(p1)
+	p.X.Neg(&p.X)
+	return p
+}
+
+// setInfinity sets p to O (0:1:1)
+func (p *PointProj) setInfinity() *PointProj {
+	p.X.SetZero()
+	p.Y.SetOne()
+	p.Z.SetOne()
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in projective coordinates with a scalar in big.Int
+func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
+
+	var _scalar big.Int
+	_scalar.Set(scalar)
+	p.Set(p1)
+	if _scalar.Sign() == -1 {
+		_scalar.Neg(&_scalar)
+		p.Neg(p)
+	}
+	var resProj PointProj
+	resProj.setInfinity()
+	const wordSize = bits.UintSize
+	sWords := _scalar.Bits()
+
+	for i := len(sWords) - 1; i >= 0; i-- {
+		ithWord := sWords[i]
+		for k := 0; k < wordSize; k++ {
+			resProj.Double(&resProj)
+			kthBit := (ithWord >> (wordSize - 1 - k)) & 1
+			if kthBit == 1 {
+				resProj.Add(&resProj, p)
+			}
+		}
+	}
+
+	p.Set(&resProj)
+	return p
+}
+
+// ScalarMul scalar multiplication of a point
+// p1 in affine coordinates with a scalar in big.Int
+func (p *PointAffine) ScalarMul(p1 *PointAffine, scalar *big.Int) *PointAffine {
+
+	var p1Proj, resProj PointProj
+	p1Proj.FromAffine(p1)
+	resProj.ScalarMul(&p1Proj, scalar)
+	p.FromProj(&resProj)
+
+	return p
+}
diff --git a/ecc/bls12-378/twistededwards/twistededwards_test.go b/ecc/bls12-378/twistededwards/twistededwards_test.go
new file mode 100644
index 000000000..cb8e64f26
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/twistededwards_test.go
@@ -0,0 +1,456 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"math/rand"
+	"testing"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+)
+
+// ------------------------------------------------------------
+// tests
+
+func TestReceiverIsOperand(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+
+	// affine
+	properties.Property("Equal affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1 PointAffine
+			p1.Set(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&params.Base)
+		},
+	))
+
+	properties.Property("Add affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+			p3.Set(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.Set(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg affine: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.Set(&params.Base)
+			p2.Set(&params.Base)
+
+			var s big.Int
+			s.SetUint64(10)
+
+			p2.ScalarMul(&p1, &s)
+			p1.ScalarMul(&p1, &s)
+
+			return p2.Equal(&p1)
+		},
+	))
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+	// proj
+	properties.Property("Equal projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+			params := GetEdwardsCurve()
+			var p1, baseProj PointProj
+			p1.FromAffine(&params.Base)
+			baseProj.FromAffine(&params.Base)
+
+			return p1.Equal(&p1) && p1.Equal(&baseProj)
+		},
+	))
+
+	properties.Property("Add projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+			p3.FromAffine(&params.Base)
+
+			res := true
+
+			p3.Add(&p1, &p2)
+			p1.Add(&p1, &p2)
+			res = res && p3.Equal(&p1)
+
+			p1.FromAffine(&params.Base)
+			p2.Add(&p1, &p2)
+			res = res && p2.Equal(&p3)
+
+			return res
+		},
+	))
+
+	properties.Property("Double projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Double(&p1)
+			p1.Double(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.Property("Neg projective: having the receiver as operand should output the same result", prop.ForAll(
+		func() bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointProj
+			p1.FromAffine(&params.Base)
+			p2.FromAffine(&params.Base)
+
+			p2.Neg(&p1)
+			p1.Neg(&p1)
+
+			return p2.Equal(&p1)
+		},
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestField(t *testing.T) {
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS := GenBigInt()
+
+	properties.Property("MulByA(x) should match Mul(x, curve.A)", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var z1, z2 fr.Element
+			z1.SetBigInt(&s)
+			z2.Mul(&z1, &params.A)
+			mulByA(&z1)
+
+			return z1.Equal(&z2)
+		},
+		genS,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func TestOps(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	parameters.MinSuccessfulTests = 100
+
+	properties := gopter.NewProperties(parameters)
+	genS1 := GenBigInt()
+	genS2 := GenBigInt()
+
+	// affine
+	properties.Property("(affine) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+			p2.Neg(&p1)
+
+			p1.Add(&p1, &p2)
+
+			var one fr.Element
+			one.SetOne()
+
+			return p1.IsOnCurve() && p1.X.IsZero() && p1.Y.Equal(&one)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			p1.ScalarMul(&params.Base, &s)
+			p2.ScalarMul(&params.Base, &s)
+
+			p1.Add(&p1, &p2)
+			p2.Double(&p2)
+
+			return p1.IsOnCurve() && p1.Equal(&p2) && !p1.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("(affine) [a]P+[b]P = [a+b]P", prop.ForAll(
+		func(s1, s2 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, p3, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetZero()
+			p1.ScalarMul(&params.Base, &s1)
+			p2.ScalarMul(&params.Base, &s2)
+			p3.Set(&params.Base)
+
+			p2.Add(&p1, &p2)
+
+			s1.Add(&s1, &s2)
+			p3.ScalarMul(&params.Base, &s1)
+
+			return p2.IsOnCurve() && p3.Equal(&p2) && !p3.Equal(&inf)
+		},
+		genS1,
+		genS2,
+	))
+
+	properties.Property("(affine) [a]P+[-a]P = O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2, inf PointAffine
+			inf.X.SetZero()
+			inf.Y.SetOne()
+			p1.ScalarMul(&params.Base, &s1)
+			s1.Neg(&s1)
+			p2.ScalarMul(&params.Base, &s1)
+
+			p2.Add(&p1, &p2)
+
+			return p2.IsOnCurve() && p2.Equal(&inf)
+		},
+		genS1,
+	))
+
+	properties.Property("[5]P=[2][2]P+P", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var p1, p2 PointAffine
+			p1.ScalarMul(&params.Base, &s1)
+
+			five := big.NewInt(5)
+			p2.Double(&p1).Double(&p2).Add(&p2, &p1)
+			p1.ScalarMul(&p1, five)
+
+			return p2.IsOnCurve() && p2.Equal(&p1)
+		},
+		genS1,
+	))
+
+	// proj
+	properties.Property("(projective) P+(-P)=O", prop.ForAll(
+		func(s1 big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p1.ScalarMul(&baseProj, &s1)
+			p2.Neg(&p1)
+
+			p.Add(&p1, &p2)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(projective) P+P=2*P", prop.ForAll(
+
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, p1, p2, p PointProj
+			baseProj.FromAffine(&params.Base)
+			p.ScalarMul(&baseProj, &s)
+
+			p1.Add(&p, &p)
+			p2.Double(&p)
+
+			return p1.Equal(&p2)
+		},
+		genS1,
+	))
+
+	// mixed
+	properties.Property("(mixed) P+(-P)=O", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+			pAffine.Neg(&pAffine)
+
+			p.MixedAdd(&pProj, &pAffine)
+
+			return p.X.IsZero() && p.Y.Equal(&p.Z)
+		},
+		genS1,
+	))
+
+	properties.Property("(mixed) P+P=2*P", prop.ForAll(
+		func(s big.Int) bool {
+
+			params := GetEdwardsCurve()
+
+			var baseProj, pProj, p, p2 PointProj
+			var pAffine PointAffine
+			baseProj.FromAffine(&params.Base)
+			pProj.ScalarMul(&baseProj, &s)
+			pAffine.ScalarMul(&params.Base, &s)
+
+			p.MixedAdd(&pProj, &pAffine)
+			p2.Double(&pProj)
+
+			return p.Equal(&p2)
+		},
+		genS1,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+
+}
+
+func TestMarshal(t *testing.T) {
+
+	var point, unmarshalPoint PointAffine
+	point.Set(&edwards.Base)
+	for i := 0; i < 20; i++ {
+		b := point.Marshal()
+		unmarshalPoint.Unmarshal(b)
+		if !point.Equal(&unmarshalPoint) {
+			t.Fatal("error unmarshal(marshal(point))")
+		}
+		point.Add(&point, &edwards.Base)
+	}
+}
+
+// GenBigInt generates a big.Int
+// TODO @thomas we use fr size as max bound here
+func GenBigInt() gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+		var s big.Int
+		var b [fr.Bytes]byte
+		_, err := rand.Read(b[:])
+		if err != nil {
+			panic(err)
+		}
+		s.SetBytes(b[:])
+		genResult := gopter.NewGenResult(s, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+// ------------------------------------------------------------
+// benches
+
+func BenchmarkScalarMul(b *testing.B) {
+	params := GetEdwardsCurve()
+	var a PointProj
+	var s big.Int
+	a.FromAffine(&params.Base)
+	s.SetString("52435875175126190479447705081859658376581184513", 10)
+	s.Add(&s, &params.Order)
+
+	var doubleAndAdd PointProj
+
+	b.Run("double and add", func(b *testing.B) {
+		b.ResetTimer()
+		for j := 0; j < b.N; j++ {
+			doubleAndAdd.ScalarMul(&a, &s)
+		}
+	})
+}
diff --git a/ecc/ecc.go b/ecc/ecc.go
index 7f5e531c0..cea2fb10d 100644
--- a/ecc/ecc.go
+++ b/ecc/ecc.go
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
-// Package ecc provides bls12-381, bls12-377, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
+// Package ecc provides bls12-381, bls12-377, bls12-378, bn254, bw6-761, bls24-315 and bw6-633 elliptic curves implementation (+pairing).
 //
 // Also
 //
@@ -40,6 +40,7 @@ const (
 	UNKNOWN ID = iota
 	BN254
 	BLS12_377
+	BLS12_378
 	BLS12_381
 	BLS24_315
 	BW6_761
@@ -48,7 +49,7 @@ const (
 
 // Implemented return the list of curves fully implemented in gnark-crypto
 func Implemented() []ID {
-	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315}
+	return []ID{BN254, BLS12_377, BLS12_381, BW6_761, BLS24_315, BW6_633, BLS12_378}
 }
 
 func (id ID) String() string {
@@ -56,6 +57,8 @@ func (id ID) String() string {
 	switch id {
 	case BLS12_377:
 		return "bls12_377"
+	case BLS12_378:
+		return "bls12_378"
 	case BLS12_381:
 		return "bls12_381"
 	case BN254:
@@ -78,6 +81,8 @@ func (id ID) Info() Info {
 	switch id {
 	case BLS12_377:
 		return newInfo(&config.BLS12_377)
+	case BLS12_378:
+		return newInfo(&config.BLS12_378)
 	case BLS12_381:
 		return newInfo(&config.BLS12_381)
 	case BN254:
diff --git a/internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000 b/internal/generator/addchain/1073dce477bd9758c3bc3fda71edd87ff573bf9ed04a00009948a20000000000
new file mode 100644
index 0000000000000000000000000000000000000000..073bdc04b1163323dae29e87d0d3dfeb44c9b73e
GIT binary patch
literal 2295
zcmXAp0c4eX9EN}IcV`_lGc(7`%*@Qp%*@Qp%*@R0%<jxgW@dM0cV?|QlFUrEJCh_y
zk|arzBuSDalO#!!BuSDaNs=VV`Ffvo^*rDE{J(QH>uaZM`d=dL7|ZScC-?U*rJJGj
zKtxUY?;ld~*D5FZ9e=7Qq+wqp@|kVFFRw;kNh0wp#VY2K2^Fa%lG0E{zLcmuRIUnc
zSMu&GmntsRyy|0>YD1-}<16Yz4QdG0@vSS>6lzp+D65uGjas?a7HU>IU)vFCQfH`D
zU7>b$hw9Z6s!(sJLw%uM^@qAO5GvDPs6|7eehr7ZG!p95XsAtN;lGscjcPnptI5!W
zCPGu13QcP|G^3f&tY$-VnhVWqKD3~P(4rPYOIivoYdN%{mC&kILpkL_d$cFCS9?Q~
z+80{WT4-JCp$%>L5$`uV>B8epH~)X`TS~Wl^Puq@GM>Z6bHsR#8qYD~Ic`D&N>BK3
z(s)i8&uQa1V?1Y#=bZ7JH=YZ|bJ2J%8P8?oxnew5jpv&2TsNK@#&gqzMwH(2;kNPI
zF`jMXxobT4jOV`bJTRVz#`DN{9vjaS<9TX4&y44}@w_mem&WtTcwQUN8{>IvLY+$A
zg?1`^?^_>?=cDm_GM>-I^Tl|+8qYW5`EEQvjOVBE{4$=r3FTFgOM;<6PZ8&0Br-^p
zAW@1$84~44R3K4_L>7rEB&w09L82CkIwb0mXh5P7Lqnb>&do@)Akm6M8xrkEbRf}*
zL>Cg>Nc14li$ot1{YVTT!DAXE_<4qiVI)S77{$=IXN)s3PD~&%iNsDMrjVFMVg`v>
zB<7HqM`8hqMI@GxSVm$6iB%+W$ZuDk&v)LRa7*k%VhxFPBsP%PkHi5aHj&su;vf=-
zkT{IQ5hRWxaSVy$NSwgXnCB$tQ%Iaf;tUdJkvNC^@156q&KHolh{Po%E+cUTiK|Fl
zL*hCTH;}lA#4RLlBXI|bZ6xj@aSw_6NIXE|Arg;}c#OmoB%UJi42kDRyg=e560eYW
zjl>&_!Y}w+&hL<TkHiNgJ|gi6iO)!ULE<YC-;nr@QTSm0;QSMbUq}@GIFf=~5+sU{
z@IP8oOk|KKL826iG9=28s6e6;i7XOTNK_+HgG4P7bx71B(SSrF5=}@nBhi9HD-vx;
iv?I}hL?;qmNOU97gG4V9eMs~pF@VG%5<^G~WBdliOVd~Y

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000 b/internal/generator/addchain/1f75820b34268c829e5ae92076883d14202cf5b238811934c06e1b068524ebfe74bfbb5411600004ca4510000000000
new file mode 100644
index 0000000000000000000000000000000000000000..1c39203e965a4186417c16fdeb5b1d70c0b0d35c
GIT binary patch
literal 3532
zcmXBW0c6$v8o=@2^WSu5c9Uc>lbKACWG2Z>CfS|o&Lm0FO}aC^-As}sy}e12B;6!Q
zk|arzZjvNPk|arzBuSDaNs@E!^EuP=`9I&^TG`Io|D3hg|EePXHahvAe^37FkEri2
zQQuceXZ_Eni0Yr)oFYHtAJJSQhVxoV?eG81&#T4%N>%yG=2A^7(Ht!ft)*%!)$|h6
zA}>|UbeUD6CEC?&)lp))s$!1nQr#|dU3y&Rxy&yyKNk3}7nWG)(i^=c`l7GI;#gdw
zBbJm{8cR#`M}LWd81Tu<N({zeiRH1p#EMu^Vkm}6e(}R4Mq;E?D@%;VXo;~HE3qn8
zl~^6CORR}CCDz8;66<1JiS@C*#D>^VqAfO-*c6*eY>v$(#$&w1L`;;}5?e}ajjbhS
z#I_QXF<D}JY%j4Rc9hr|J4@_}T_twM?h<=qPd>PNb3De42X@xf1AZXt2l8VFb8;QZ
z$#pm<*O8oDM{{x=%PG+r_2YRsk(29WPOeiqrK+FKJd=~_Y)-CoIl0c~<hqcP>tar>
zOF6kN=j6JQlj~|uu4_3Zx}ttP4>xjh-OR~#D<{|OoLqNua^1}-(G&H1dAOgG>p@Pg
zhdH?(<>Y#tlj})NuBSOAx}*Lq56^RQy~xS+GAGxo9FK7Qbspa2<a(Qv>s?N+_c^&f
z<mCF8Q=&EMpYrfIC)bypTwil?eap%9Jtx<XoLoP1a{bE5^*bk5EvG~+8eA$$%*)m6
zJPpx;XhpOkrXyxhVrH(H&a)8hh}nn^L?>bnq6^WDn2YE^`1<C#eQNHaS}Z^;L@Yw|
zBKi=EaR=AjU$wl$lITYaAeJEpDOJrqSIfIDi6O)=Vg#`gF^U*NtU|0ttU;{BEm?Dq
z*77b*Vgq6$ViRIBVjMAn*uvD~wAFbVViK_(u>-Lav5V4p&UQQRLF`5BL+nQ!KpaFI
zLLA23U5g{mM-j&m#}OwGCz<+*I^}#CaRzY~aSm}FaRG4=aS3r5aRqS|agC`j^Sbj5
z#7)F4#BIbK#9hQaO5+Z=@B9Gq5b+4{81V%0l&OFG%=tOu1>z;*72-AG4dN~09pXLW
z1L7m%6XG+aam#&i{)+g9_>TC2_=)(1_>E}%$*39{KTO_LYgIF18lnZ!ifBVjN6bLX
zM9jj=XRT^?o{i{0bRy;;x)9xnxriRbJj8swQPiph&I=KX5WR>##A3t}#8N~*#j9O4
z;JgemNWQ{U%bivrh7iMu5yVQwC}Iq;isD7ETJ5|Bu@=8Y)T(vP>k%6e8xfljn-SxP
z3H*LitF}0AMQlS%;=9@Iv;(mdu?w*qu?Mjiu@AAIsqf~1^FhQR#9_n{#8JdC#BmyL
zZPf|qlZaD@(}**OvxsvPZ)DYZ=L?97h)ameh%1Pzh---Jh#QETG+ybdTh6x;cMx}(
zdg|^u-$y(^JVZP~JVrc0JViW1JV(4hyhOZ0yhgl1yhXf2yhnUMd_;Ugd`5ghe5LVP
zSABE-j`)H2iTH*1ji`~))ZkLlxLcc=ou?sM5Uq$d#B{_A8owAc&2*lH$fv!jUCc&w
zkU!r|olbKQU5IYPTtp9I9%4RX0b(Iy5uz8-hgghQf>?^^M+_jAAqEl45i1Zwh+)JC
lVkKe}F@{)$SdCbNSc_PPSdZ9%*ofGK*o+uQOdz&U{tunqTR;E+

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228 b/internal/generator/addchain/41cf7391def65d630ef0ff69c7b761ffd5cefe7b4128000265228
new file mode 100644
index 0000000000000000000000000000000000000000..e40bf61a54cf030d9274511ce49ccfa702b4e4ef
GIT binary patch
literal 1907
zcmXAp4@6yY97n&Ox3naaBuSDaNs=T<k|Zr@NhV2J(vsGanIzetWM*b2nVFfHnVFfH
znVFfHnPg^WW|EninVFf)Z1e6ur#R<*&hNha$n{<Y$;p(oyLa~R!Px_clq`jk6A{J9
z@m*4C-wY@Dj=jnVsmres>D|q~FZs(GsYv;aOl9(t70OU{q;f)8@=LaIL%GW1az1w#
zP=&nVDpYYOM<v``8Y)v+s6^$Vd{u<XRT(N!Rj5eST&oFHs+P~zg(_4Zs#Zg&R*j)j
zHHGrj9I8`Gs9CL{Mzw|Zs6AArj!>&QLk;Q*wWvE(qn=PF*SggkDpr4}Pko^Q4TJ_Y
z7#h-0XjsFc5sidKH5wYzSZG}1p$ScdCN&wF(o|?#)1ev7gl07xI-(<?ejN?XX)ZLc
z`OtzELW^4T6Fz2m(A!TpS=xE*%Sx7g@s#nLHl8!abJlpy8P9nWYE!b}!>aLIFrJIX
zbIEuv8_yNvxoSMujOV)X+%TS-#&gSfZX3@X<GE`*_l)Ph34ark2R=MBo=3*>*m#~8
z&r{=hW<1Z0=Y{dC8PB@$yfmIy#`D^E-WbnY<9TO1?~P}}cs7mag9+6u`54--<dZLb
zHl8oW^VN918P9j)`C&Xijc3bvei_ei<N0Ge|BNSXLTPOqYImmS3?wp<$U-6;i5w*M
zAd!ni9uoOT6d+NEL=h6jNR%K^ibNR_<rwPlRB*0Dq6&#>Bx;bTMWPOgdL$Z<Xhfn3
ziDo2PkZ47s4GA7oJHg-6L3AR~g+w=odObayiC&@)iGC#ZBQb!)AQD4J3?ngu#3&MD
zNQ@&ffy5*dQ%FoBF@t=!()_m5{&id8C=zo>%p<XY#3B;MkT`+F5)#WuoI>I>5@(P&
zi^Mr3&Lgpcp&rjF=L<+&MB)+>myx)F{L@bBD(7oRTu0&t5;u{!g~V+n?jUg&iF-)g
zN8$kz50Q9;#A76$An_E5XGlCp;sp|GNUS6A5{Xwxyhh>;5^s@shs1j%HjvoF*#3fl
f;QSGZPe^=5;tLXAk@$u?PxL$IA4vQ}VhiIxt!Ze)

literal 0
HcmV?d00001

diff --git a/internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228 b/internal/generator/addchain/fbac1059a1346414f2d74903b441e8a10167ad91c408c9a60370d83429275ff3a5fddaa08b0000265228
new file mode 100644
index 0000000000000000000000000000000000000000..83c675ccdee7f4ede5c044d34243d6576b7b41e9
GIT binary patch
literal 3158
zcmXBW0f>!#7Qo?i-kIz)O_L@~lB^_2(wC$!Ns?qGJ4uoxD_O}}O_C&8D@j(8BuSDa
zNs=T<k|arzBuSDaNs=V#-aXHqoaa5i{}`INciwwvu>IdA^Djfg|NiIjKmL{N{gLf`
z5#!tc>B?;WzRfA?Gyay{k-4O<5v{(z*5|eIf6+AlvOAhlk=@zj&>Kx(H2smIvMx;y
zxQvPH$w4(%jf?D8O-@h~)g+h6E>m2lx=f3lmec*$Ga_fW%*>gQvvOAC?3^7rF8_?2
zlXD{H=G@46InO80k6e%oA{XYu$VIs*a&az>`r?;FF3qLUEQ=h<p~&UAJaR>@h+LT~
zBUk0B$kn+za!sy@T$^hn*X6p%zFZ%<AvZ*B%#D$oa#Q5y+#I<jw?uBut&yX1TjX#K
zM{dvUkvnon<j&j~xhr=??#|tjdvZ@bxO;0n#+?Ut%*X?NAlnD(V+U($9jd8yxTe;T
znp#I|Y8|VI9G~subvRK|>ts!>Q#H}FPgkC)sdcud*14Km=WA+RsHt_arq-pJT9<2T
zU8$*cwWij!n#hUSzFvnLHMMTm)Vftu>vm18J2kcL)<jOp_PsjXuc`H*rq;unT90aK
zJ+7(sq^8!>n#f7nepZL)HML&U)OuM{>s5_Mxc#~gZ)$42t*P~{rq=tKS|4g^eXNP>
z&Gx4{e6FeWrKZ-`np)p#YJIP%^`oZN&zf4lYHIzisnx29Y-NW_L*&$2-Oi&BJ&0aJ
zAEF;In#h4#1I}X*gNU(+aftDV35bb^Nr=gaDF|QRRJTvdUDV3yh#81K5Hk_85VLUy
zx7=T?y2FZ?i<pO)k61u7E%#ij?z$otBbFeRB9<YB5X%uO5GxU@5UX)Zw%ntwx=V{#
zhggr;fY^xGgxHMO!pP&a)p;9Y7_l9(1F;jai|9OOyPfwS_9FHn_9G4;4k8XA4&&}_
z<q_wjh+~N3h!cpDjQm8Maz2eXgE)&ghd7V8fVhabgt&~jg1Cyf#>kg>-T4OMCgK+2
zHsTKAF5({1xdZMyKR`T0JVHE1JV88V<R3qCevWv7c!_w0c#U|2c#C+4c#rsi_=xy~
z_)K(exi8LN5#JEs5kC+=5x)?>5uHC3O-JX4$-8Q+=|+q~^dNc>eTaU<Xv6?w3|>B4
z&7kvG#5lxw#011d#3aOI#1zC-#5BB7w3_M8GZ23uW+G-GW+VPY%t6d0yxKMMoaZAJ
zP_Hn}LZ?ND#fT+{rHEySA;faT3c`zEv(kALVl{q?Xf<n`*CN&-)+06`HX=45Hskk`
zR<p%<D`FdB7~joyryYo$h+T-?h&_nCh<%9tjC?l-oDU)nAr2#sAdVuAA&%2|Yimw8
zpG2HOoJO2MoJE`?ypc8Moi89RA}%2=Bd#E>BCa8>BW@sW(s`w8ZaLpZ+(F!B<f*&o
zd>`=u@euI{@fh(0@f7h4@f`62@e=V0@fz_4@fPt8@gDI3@e%O}@fq<2@s-YNUGvTP
zJK_i8C*l|4H=;#FSBFbO=Wgxlb{>W3LG&W}5dDbJbbc}D8gL$isHeSaP>d!153Y#u
A2><{9

literal 0
HcmV?d00001

diff --git a/internal/generator/config/bls12-378.go b/internal/generator/config/bls12-378.go
new file mode 100644
index 000000000..4fb9eddfe
--- /dev/null
+++ b/internal/generator/config/bls12-378.go
@@ -0,0 +1,29 @@
+package config
+
+var BLS12_378 = Curve{
+	Name:         "bls12-378",
+	CurvePackage: "bls12378",
+	EnumID:       "BLS12_378",
+	FrModulus:    "14883435066912132899950318861128167269793560281114003360875131245101026639873",
+	FpModulus:    "605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417",
+	G1: Point{
+		CoordType:        "fp.Element",
+		PointName:        "g1",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+	},
+	G2: Point{
+		CoordType:        "fptower.E2",
+		PointName:        "g2",
+		GLV:              true,
+		CofactorCleaning: true,
+		CRange:           defaultCRange(),
+		Projective:       true,
+	},
+}
+
+func init() {
+	addCurve(&BLS12_378)
+
+}
diff --git a/internal/generator/ecc/template/point.go.tmpl b/internal/generator/ecc/template/point.go.tmpl
index aa2a6ce76..916263d79 100644
--- a/internal/generator/ecc/template/point.go.tmpl
+++ b/internal/generator/ecc/template/point.go.tmpl
@@ -520,8 +520,7 @@ func (p *{{ $TJacobian }}) IsOnCurve() bool {
 
                 return res.IsOnCurve() && res.Z.IsZero()
         }
-        {{else if eq .Name "bls12-377"}}
-            // IsInSubGroup returns true if p is on the r-torsion, false otherwise.
+        {{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
             // https://eprint.iacr.org/2021/1130.pdf, sec.4
             // psi(p) = u*P
             func (p *{{ $TJacobian }}) IsInSubGroup() bool {
@@ -689,7 +688,7 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 	res.ScalarMultiplication(a, &xGen).AddAssign(a)
 	p.Set(&res)
 	return p
-{{else if eq .Name "bls12-377"}}
+{{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
 	// cf https://eprint.iacr.org/2019/403.pdf, 5
 	var res {{$TJacobian}}
 	res.ScalarMultiplication(a, &xGen).Neg(&res).AddAssign(a)
@@ -815,7 +814,7 @@ func (p *{{$TJacobian}}) ClearCofactor(a *{{$TJacobian}}) *{{$TJacobian}} {
 
 	return p
 
-{{else if eq .Name "bls12-377"}}
+{{else if or (eq .Name "bls12-377") (eq .Name "bls12-378")}}
     // https://eprint.iacr.org/2017/419.pdf, 4.1
 	var xg, xxg, res, t G2Jac
 	xg.ScalarMultiplication(a, &xGen)

From 967ff63e37db421a72f1beb59ec8c42af242e6e8 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Mon, 20 Dec 2021 13:47:38 +0100
Subject: [PATCH 15/29] fix(bls12-378): set root of unity for FFT

---
 ecc/bls12-378/fr/fft/domain.go                  | 7 +++++++
 ecc/bls12-378/fr/fft/fft.go                     | 2 ++
 ecc/bls12-378/fr/fft/fft_test.go                | 2 ++
 ecc/bls12-378/fr/fft/fuzz.go                    | 2 ++
 internal/generator/fft/template/domain.go.tmpl  | 7 ++++---
 internal/generator/fft/template/imports.go.tmpl | 8 ++++++--
 6 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/ecc/bls12-378/fr/fft/domain.go b/ecc/bls12-378/fr/fft/domain.go
index 97ec9125e..ac953fafd 100644
--- a/ecc/bls12-378/fr/fft/domain.go
+++ b/ecc/bls12-378/fr/fft/domain.go
@@ -24,6 +24,10 @@ import (
 	"runtime"
 	"sync"
 
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+
+	curve "github.com/consensys/gnark-crypto/ecc/bls12-378"
+
 	"github.com/consensys/gnark-crypto/ecc"
 )
 
@@ -80,6 +84,9 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 	// generator of the largest 2-adic subgroup
 	var rootOfUnity fr.Element
 
+	rootOfUnity.SetString("4045585818372166415418670827807793147093034396422209590578257013290761627990")
+	const maxOrderRoot uint64 = 42
+
 	domain := &Domain{}
 	x := ecc.NextPowerOfTwo(m)
 	domain.Cardinality = uint64(x)
diff --git a/ecc/bls12-378/fr/fft/fft.go b/ecc/bls12-378/fr/fft/fft.go
index 66f299d78..532ed4e34 100644
--- a/ecc/bls12-378/fr/fft/fft.go
+++ b/ecc/bls12-378/fr/fft/fft.go
@@ -22,6 +22,8 @@ import (
 
 	"github.com/consensys/gnark-crypto/ecc"
 	"github.com/consensys/gnark-crypto/internal/parallel"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
 )
 
 // Decimation is used in the FFT call to select decimation in time or in frequency
diff --git a/ecc/bls12-378/fr/fft/fft_test.go b/ecc/bls12-378/fr/fft/fft_test.go
index c7416fff7..39e067af8 100644
--- a/ecc/bls12-378/fr/fft/fft_test.go
+++ b/ecc/bls12-378/fr/fft/fft_test.go
@@ -21,6 +21,8 @@ import (
 	"strconv"
 	"testing"
 
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+
 	"github.com/leanovate/gopter"
 	"github.com/leanovate/gopter/gen"
 	"github.com/leanovate/gopter/prop"
diff --git a/ecc/bls12-378/fr/fft/fuzz.go b/ecc/bls12-378/fr/fft/fuzz.go
index 1c25b2420..8beef8c6b 100644
--- a/ecc/bls12-378/fr/fft/fuzz.go
+++ b/ecc/bls12-378/fr/fft/fuzz.go
@@ -23,6 +23,8 @@ import (
 	"bytes"
 	"fmt"
 	"github.com/consensys/gnark-crypto/ecc"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
 )
 
 const (
diff --git a/internal/generator/fft/template/domain.go.tmpl b/internal/generator/fft/template/domain.go.tmpl
index 7eb038518..269adacb0 100644
--- a/internal/generator/fft/template/domain.go.tmpl
+++ b/internal/generator/fft/template/domain.go.tmpl
@@ -63,7 +63,10 @@ func NewDomain(m, depth uint64, precomputeReversedTable bool) *Domain {
 
 	// generator of the largest 2-adic subgroup
 	var rootOfUnity fr.Element
-	{{if eq .Name "bls12-377"}}
+	{{if eq .Name "bls12-378"}}
+		rootOfUnity.SetString("4045585818372166415418670827807793147093034396422209590578257013290761627990")
+		const maxOrderRoot uint64 = 42
+	{{else if eq .Name "bls12-377"}}
 		rootOfUnity.SetString("8065159656716812877374967518403273466521432693661810619979959746626482506078")
 		const maxOrderRoot uint64 = 47
 	{{else if eq .Name "bls12-381"}}
@@ -294,5 +297,3 @@ func (d *Domain) ReadFrom(r io.Reader) (int64, error) {
 
 	return dec.BytesRead(), nil
 }
-
-
diff --git a/internal/generator/fft/template/imports.go.tmpl b/internal/generator/fft/template/imports.go.tmpl
index 70336db5d..f2b26bcc7 100644
--- a/internal/generator/fft/template/imports.go.tmpl
+++ b/internal/generator/fft/template/imports.go.tmpl
@@ -1,6 +1,8 @@
 {{ define "import_fr" }}
 
-{{ if eq .Name "bls12-377"}}
+{{ if eq .Name "bls12-378"}}
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+{{ else if eq .Name "bls12-377"}}
 	"github.com/consensys/gnark-crypto/ecc/bls12-377/fr"
 {{ else if eq .Name "bls12-381"}}
 	"github.com/consensys/gnark-crypto/ecc/bls12-381/fr"
@@ -17,7 +19,9 @@
 {{end}}
 
 {{ define "import_curve" }}
-{{if eq .Name "bls12-377"}}
+{{if eq .Name "bls12-378"}}
+	curve "github.com/consensys/gnark-crypto/ecc/bls12-378"
+{{else if eq .Name "bls12-377"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bls12-377"
 {{else if eq .Name "bls12-381"}}
 	curve "github.com/consensys/gnark-crypto/ecc/bls12-381"

From 1f0e45f33d86dbddcae9324e31a67c3490cb92ec Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 21 Dec 2021 16:14:57 +0100
Subject: [PATCH 16/29] build: add bls12-378 to kzg and hash

---
 hash/hashes.go            | 7 +++++++
 internal/apicheck_test.go | 5 +++++
 kzg/kzg.go                | 3 +++
 3 files changed, 15 insertions(+)

diff --git a/hash/hashes.go b/hash/hashes.go
index 1845ce25a..01b9b86ae 100644
--- a/hash/hashes.go
+++ b/hash/hashes.go
@@ -21,6 +21,7 @@ import (
 	"hash"
 
 	bls377 "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/mimc"
+	bls378 "github.com/consensys/gnark-crypto/ecc/bls12-378/fr/mimc"
 	bls381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr/mimc"
 	bls315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/mimc"
 	bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/mimc"
@@ -34,6 +35,7 @@ const (
 	MIMC_BN254 Hash = iota
 	MIMC_BLS12_381
 	MIMC_BLS12_377
+	MIMC_BLS12_378
 	MIMC_BW6_761
 	MIMC_BLS24_315
 	MIMC_BW6_633
@@ -44,6 +46,7 @@ var digestSize = []uint8{
 	MIMC_BN254:     32,
 	MIMC_BLS12_381: 48,
 	MIMC_BLS12_377: 48,
+	MIMC_BLS12_378: 48,
 	MIMC_BW6_761:   96,
 	MIMC_BLS24_315: 48,
 	MIMC_BW6_633:   80,
@@ -58,6 +61,8 @@ func (m Hash) New(seed string) hash.Hash {
 		return bls381.NewMiMC(seed)
 	case MIMC_BLS12_377:
 		return bls377.NewMiMC(seed)
+	case MIMC_BLS12_378:
+		return bls378.NewMiMC(seed)
 	case MIMC_BW6_761:
 		return bw761.NewMiMC(seed)
 	case MIMC_BLS24_315:
@@ -78,6 +83,8 @@ func (m Hash) String() string {
 		return "MIMC_BLS381"
 	case MIMC_BLS12_377:
 		return "MIMC_BLS377"
+	case MIMC_BLS12_378:
+		return "MIMC_BLS378"
 	case MIMC_BW6_761:
 		return "MIMC_BW761"
 	case MIMC_BLS24_315:
diff --git a/internal/apicheck_test.go b/internal/apicheck_test.go
index eebf5e3f6..384c138d1 100644
--- a/internal/apicheck_test.go
+++ b/internal/apicheck_test.go
@@ -2,6 +2,7 @@ package main
 
 import (
 	bls377 "github.com/consensys/gnark-crypto/ecc/bls12-377"
+	bls378 "github.com/consensys/gnark-crypto/ecc/bls12-378"
 	bls381 "github.com/consensys/gnark-crypto/ecc/bls12-381"
 	"github.com/consensys/gnark-crypto/ecc/bn254"
 	bw761 "github.com/consensys/gnark-crypto/ecc/bw6-761"
@@ -14,6 +15,7 @@ var err error
 
 var (
 	gtbls377 bls377.GT
+	gtbls378 bls378.GT
 	gtbls381 bls381.GT
 	gtbn254  bn254.GT
 	gtbw761  bw761.GT
@@ -22,18 +24,21 @@ var (
 func init() {
 	// Pair
 	gtbls377, err = bls377.Pair([]bls377.G1Affine{}, []bls377.G2Affine{})
+	gtbls378, err = bls378.Pair([]bls378.G1Affine{}, []bls378.G2Affine{})
 	gtbls381, err = bls381.Pair([]bls381.G1Affine{}, []bls381.G2Affine{})
 	gtbn254, err = bn254.Pair([]bn254.G1Affine{}, []bn254.G2Affine{})
 	gtbw761, err = bw761.Pair([]bw761.G1Affine{}, []bw761.G2Affine{})
 
 	// MillerLoop
 	gtbls377, err = bls377.MillerLoop([]bls377.G1Affine{}, []bls377.G2Affine{})
+	gtbls378, err = bls378.MillerLoop([]bls378.G1Affine{}, []bls378.G2Affine{})
 	gtbls381, err = bls381.MillerLoop([]bls381.G1Affine{}, []bls381.G2Affine{})
 	gtbn254, err = bn254.MillerLoop([]bn254.G1Affine{}, []bn254.G2Affine{})
 	gtbw761, err = bw761.MillerLoop([]bw761.G1Affine{}, []bw761.G2Affine{})
 
 	// FinalExp
 	gtbls377 = bls377.FinalExponentiation(&gtbls377)
+	gtbls378 = bls378.FinalExponentiation(&gtbls378)
 	gtbls381 = bls381.FinalExponentiation(&gtbls381)
 	gtbn254 = bn254.FinalExponentiation(&gtbn254)
 	gtbw761 = bw761.FinalExponentiation(&gtbw761)
diff --git a/kzg/kzg.go b/kzg/kzg.go
index 88738ccae..2da1e58c5 100644
--- a/kzg/kzg.go
+++ b/kzg/kzg.go
@@ -9,6 +9,7 @@ import (
 	"github.com/consensys/gnark-crypto/ecc"
 
 	kzg_bls12377 "github.com/consensys/gnark-crypto/ecc/bls12-377/fr/kzg"
+	kzg_bls12378 "github.com/consensys/gnark-crypto/ecc/bls12-378/fr/kzg"
 	kzg_bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr/kzg"
 	kzg_bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/kzg"
 	kzg_bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/kzg"
@@ -29,6 +30,8 @@ func NewSRS(curveID ecc.ID) SRS {
 		return &kzg_bn254.SRS{}
 	case ecc.BLS12_377:
 		return &kzg_bls12377.SRS{}
+	case ecc.BLS12_378:
+		return &kzg_bls12378.SRS{}
 	case ecc.BLS12_381:
 		return &kzg_bls12381.SRS{}
 	case ecc.BLS24_315:

From b0870d84e429deb8b265ef1629e143b73bdba3dd Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Sat, 25 Dec 2021 19:58:11 +0100
Subject: [PATCH 17/29] feat(bls12-378): add companion twisted edwards to
 GT-strong BLS12-378

---
 .../twistededwards/twistededwards.go          | 64 +++++++++++++++++++
 signature/signature.go                        |  1 +
 2 files changed, 65 insertions(+)
 create mode 100644 ecc/bls12-378/twistededwards/twistededwards.go

diff --git a/ecc/bls12-378/twistededwards/twistededwards.go b/ecc/bls12-378/twistededwards/twistededwards.go
new file mode 100644
index 000000000..e2f98390c
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/twistededwards.go
@@ -0,0 +1,64 @@
+/*
+Copyright © 2020 ConsenSys
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package twistededwards
+
+import (
+	"math/big"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element // in Montgomery form
+	Cofactor fr.Element // not in Montgomery form
+	Order    big.Int
+	Base     PointAffine
+}
+
+var edwards CurveParams
+
+// GetEdwardsCurve returns the twisted Edwards curve on BLS12-378's Fr
+func GetEdwardsCurve() CurveParams {
+
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&edwards.A)
+	res.D.Set(&edwards.D)
+	res.Cofactor.Set(&edwards.Cofactor)
+	res.Order.Set(&edwards.Order)
+	res.Base.Set(&edwards.Base)
+
+	return res
+}
+
+func init() {
+
+	edwards.A.SetString("1169928")
+	edwards.D.SetString("1169924")
+	edwards.Cofactor.SetUint64(8).FromMont()
+	edwards.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
+
+	edwards.Base.X.SetString("4274983589151226901853657690021194631121133716096168671136076068148698830183")
+	edwards.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
+}
+
+// mulByA multiplies fr.Element by edwards.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &edwards.A)
+}
diff --git a/signature/signature.go b/signature/signature.go
index 31cceb81b..df823b198 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -81,6 +81,7 @@ const (
 	EDDSA_BN254 SignatureScheme = iota
 	EDDSA_BLS12_381
 	EDDSA_BLS12_377
+	EDDSA_BLS12_378
 	EDDSA_BW6_761
 	EDDSA_BLS24_315
 	EDDSA_BW6_633

From 66860824a40768b67b82d49d839acbc50b28608f Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Sat, 25 Dec 2021 20:06:48 +0100
Subject: [PATCH 18/29] fix: increment maxSignatures

---
 signature/signature.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/signature/signature.go b/signature/signature.go
index df823b198..f736ea653 100644
--- a/signature/signature.go
+++ b/signature/signature.go
@@ -75,7 +75,7 @@ type Signer interface {
 
 type SignatureScheme uint
 
-const maxSignatures = 6
+const maxSignatures = 7
 
 const (
 	EDDSA_BN254 SignatureScheme = iota

From 7a2f57b323f98265de03ee009aeef8804fbf19f4 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 31 Dec 2021 19:18:22 +0100
Subject: [PATCH 19/29] perf(bls12-378/tEd): smallest A coeff

---
 ecc/bls12-378/twistededwards/twistededwards.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ecc/bls12-378/twistededwards/twistededwards.go b/ecc/bls12-378/twistededwards/twistededwards.go
index e2f98390c..676838f24 100644
--- a/ecc/bls12-378/twistededwards/twistededwards.go
+++ b/ecc/bls12-378/twistededwards/twistededwards.go
@@ -49,12 +49,12 @@ func GetEdwardsCurve() CurveParams {
 
 func init() {
 
-	edwards.A.SetString("1169928")
-	edwards.D.SetString("1169924")
+	edwards.A.SetString("16249")
+	edwards.D.SetString("826857503717340716663906603396009292766308904506333520048618402505612607353")
 	edwards.Cofactor.SetUint64(8).FromMont()
 	edwards.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
 
-	edwards.Base.X.SetString("4274983589151226901853657690021194631121133716096168671136076068148698830183")
+	edwards.Base.X.SetString("6772953896463446981848394912418300623023000177913479948380771331313783560843")
 	edwards.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
 }
 

From cac5f229fb5d0f9eb7724a82f1858600dbaa4b0c Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Tue, 18 Jan 2022 16:36:46 +0100
Subject: [PATCH 20/29] build: rebase branch on develop

---
 ecc/bls12-377/g2.go                      |  1 -
 ecc/bls12-378/fp/doc.go                  |  2 +-
 ecc/bls12-378/fp/element.go              | 10 +++-
 ecc/bls12-378/fp/element_mul_adx_amd64.s |  4 +-
 ecc/bls12-378/fp/element_mul_amd64.s     |  4 +-
 ecc/bls12-378/fr/doc.go                  |  2 +-
 ecc/bls12-378/fr/element.go              | 10 +++-
 ecc/bls12-378/fr/element_mul_adx_amd64.s |  4 +-
 ecc/bls12-378/fr/element_mul_amd64.s     |  4 +-
 ecc/bls12-378/fr/fft/fft_test.go         |  2 +-
 ecc/bls12-378/fr/kzg/kzg_test.go         | 69 ++++++++++++++++++------
 ecc/bls12-378/g1_test.go                 | 11 ++++
 ecc/bls12-378/g2_test.go                 | 11 ++++
 13 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/ecc/bls12-377/g2.go b/ecc/bls12-377/g2.go
index 3a317952e..fa4840972 100644
--- a/ecc/bls12-377/g2.go
+++ b/ecc/bls12-377/g2.go
@@ -371,7 +371,6 @@ func (p *G2Jac) IsOnCurve() bool {
 	return left.Equal(&right)
 }
 
-// IsInSubGroup returns true if p is on the r-torsion, false otherwise.
 // https://eprint.iacr.org/2021/1130.pdf, sec.4
 // psi(p) = u*P
 func (p *G2Jac) IsInSubGroup() bool {
diff --git a/ecc/bls12-378/fp/doc.go b/ecc/bls12-378/fp/doc.go
index dd844b5dc..da38994c4 100644
--- a/ecc/bls12-378/fp/doc.go
+++ b/ecc/bls12-378/fp/doc.go
@@ -16,7 +16,7 @@
 
 // Package fp contains field arithmetic operations for modulus = 0x3eeb04...000001.
 //
-// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
 //
 // The modulus is hardcoded in all the operations.
 //
diff --git a/ecc/bls12-378/fp/element.go b/ecc/bls12-378/fp/element.go
index 69e071293..3619dc7ef 100644
--- a/ecc/bls12-378/fp/element.go
+++ b/ecc/bls12-378/fp/element.go
@@ -155,6 +155,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case Element:
 		return z.Set(&c1), nil
 	case *Element:
+		if c1 == nil {
+			return nil, errors.New("can't set fp.Element with <nil>")
+		}
 		return z.Set(c1), nil
 	case uint8:
 		return z.SetUint64(uint64(c1)), nil
@@ -179,6 +182,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case string:
 		return z.SetString(c1), nil
 	case *big.Int:
+		if c1 == nil {
+			return nil, errors.New("can't set fp.Element with <nil>")
+		}
 		return z.SetBigInt(c1), nil
 	case big.Int:
 		return z.SetBigInt(&c1), nil
@@ -374,14 +380,14 @@ func (z *Element) Halve() {
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
diff --git a/ecc/bls12-378/fp/element_mul_adx_amd64.s b/ecc/bls12-378/fp/element_mul_adx_amd64.s
index a6f902c36..f48f40ac4 100644
--- a/ecc/bls12-378/fp/element_mul_adx_amd64.s
+++ b/ecc/bls12-378/fp/element_mul_adx_amd64.s
@@ -54,7 +54,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -571,7 +571,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 TEXT ·fromMont(SB), NOSPLIT, $0-8
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bls12-378/fp/element_mul_amd64.s b/ecc/bls12-378/fp/element_mul_amd64.s
index 171a75360..daf2546be 100644
--- a/ecc/bls12-378/fp/element_mul_amd64.s
+++ b/ecc/bls12-378/fp/element_mul_amd64.s
@@ -54,7 +54,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $24-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -585,7 +585,7 @@ TEXT ·fromMont(SB), $8-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bls12-378/fr/doc.go b/ecc/bls12-378/fr/doc.go
index 2425cb964..eb7c1ef2a 100644
--- a/ecc/bls12-378/fr/doc.go
+++ b/ecc/bls12-378/fr/doc.go
@@ -16,7 +16,7 @@
 
 // Package fr contains field arithmetic operations for modulus = 0x20e7b9...000001.
 //
-// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
 //
 // The modulus is hardcoded in all the operations.
 //
diff --git a/ecc/bls12-378/fr/element.go b/ecc/bls12-378/fr/element.go
index b84a12c1b..37bc51f7a 100644
--- a/ecc/bls12-378/fr/element.go
+++ b/ecc/bls12-378/fr/element.go
@@ -147,6 +147,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case Element:
 		return z.Set(&c1), nil
 	case *Element:
+		if c1 == nil {
+			return nil, errors.New("can't set fr.Element with <nil>")
+		}
 		return z.Set(c1), nil
 	case uint8:
 		return z.SetUint64(uint64(c1)), nil
@@ -171,6 +174,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case string:
 		return z.SetString(c1), nil
 	case *big.Int:
+		if c1 == nil {
+			return nil, errors.New("can't set fr.Element with <nil>")
+		}
 		return z.SetBigInt(c1), nil
 	case big.Int:
 		return z.SetBigInt(&c1), nil
@@ -342,14 +348,14 @@ func (z *Element) Halve() {
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
diff --git a/ecc/bls12-378/fr/element_mul_adx_amd64.s b/ecc/bls12-378/fr/element_mul_adx_amd64.s
index 35a9c7b30..c37f0fbc1 100644
--- a/ecc/bls12-378/fr/element_mul_adx_amd64.s
+++ b/ecc/bls12-378/fr/element_mul_adx_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -323,7 +323,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 TEXT ·fromMont(SB), NOSPLIT, $0-8
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bls12-378/fr/element_mul_amd64.s b/ecc/bls12-378/fr/element_mul_amd64.s
index 850f72813..1cb26f23e 100644
--- a/ecc/bls12-378/fr/element_mul_amd64.s
+++ b/ecc/bls12-378/fr/element_mul_amd64.s
@@ -46,7 +46,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $24-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -337,7 +337,7 @@ TEXT ·fromMont(SB), $8-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bls12-378/fr/fft/fft_test.go b/ecc/bls12-378/fr/fft/fft_test.go
index 39e067af8..eb7e4585c 100644
--- a/ecc/bls12-378/fr/fft/fft_test.go
+++ b/ecc/bls12-378/fr/fft/fft_test.go
@@ -377,7 +377,7 @@ func BenchmarkFFTDITCosetReference(b *testing.B) {
 		pol[i] = pol[i-1]
 	}
 
-	domain := NewDomain(maxSize, 0, false)
+	domain := NewDomain(maxSize, 1, false)
 
 	b.ResetTimer()
 	for j := 0; j < b.N; j++ {
diff --git a/ecc/bls12-378/fr/kzg/kzg_test.go b/ecc/bls12-378/fr/kzg/kzg_test.go
index 837f2c305..914767d40 100644
--- a/ecc/bls12-378/fr/kzg/kzg_test.go
+++ b/ecc/bls12-378/fr/kzg/kzg_test.go
@@ -174,11 +174,23 @@ func TestVerifySinglePoint(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// verify wrong proof
-	proof.ClaimedValue.Double(&proof.ClaimedValue)
-	err = Verify(&digest, &proof, testSRS)
-	if err == nil {
-		t.Fatal("verifying wrong proof should have failed")
+	{
+		// verify wrong proof
+		proof.ClaimedValue.Double(&proof.ClaimedValue)
+		err = Verify(&digest, &proof, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
+	}
+	{
+		// verify wrong proof with quotient set to zero
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proof.H.X.SetZero()
+		proof.H.Y.SetZero()
+		err = Verify(&digest, &proof, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
 	}
 }
 
@@ -224,11 +236,23 @@ func TestBatchVerifySinglePoint(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// verify wrong proof
-	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
-	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
-	if err == nil {
-		t.Fatal("verifying wrong proof should have failed")
+	{
+		// verify wrong proof
+		proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+		err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
+	}
+	{
+		// verify wrong proof with quotient set to zero
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proof.H.X.SetZero()
+		proof.H.Y.SetZero()
+		err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
 	}
 
 }
@@ -282,11 +306,26 @@ func TestBatchVerifyMultiPoints(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// batch verify tampered folded proofs
-	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
-	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
-	if err == nil {
-		t.Fatal(err)
+	{
+		// batch verify tampered folded proofs
+		proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+
+		err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+	{
+		// batch verify tampered folded proofs with quotients set to infinity
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proofs[0].H.X.SetZero()
+		proofs[0].H.Y.SetZero()
+		proofs[1].H.X.SetZero()
+		proofs[1].H.Y.SetZero()
+		err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+		if err == nil {
+			t.Fatal(err)
+		}
 	}
 
 }
diff --git a/ecc/bls12-378/g1_test.go b/ecc/bls12-378/g1_test.go
index 13346156b..89a1f03c6 100644
--- a/ecc/bls12-378/g1_test.go
+++ b/ecc/bls12-378/g1_test.go
@@ -124,6 +124,17 @@ func TestG1AffineIsOnCurve(t *testing.T) {
 		GenFp(),
 	))
 
+	properties.Property("[BLS12-378] IsInSubGroup and MulBy subgroup order should be the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Jac
+			op1 = fuzzJacobianG1Affine(&g1Gen, a)
+			_r := fr.Modulus()
+			op2.ScalarMultiplication(&op1, _r)
+			return op1.IsInSubGroup() && op2.Z.IsZero()
+		},
+		GenFp(),
+	))
+
 	properties.TestingRun(t, gopter.ConsoleReporter(false))
 }
 
diff --git a/ecc/bls12-378/g2_test.go b/ecc/bls12-378/g2_test.go
index f813c2b39..31f2b27de 100644
--- a/ecc/bls12-378/g2_test.go
+++ b/ecc/bls12-378/g2_test.go
@@ -138,6 +138,17 @@ func TestG2AffineIsOnCurve(t *testing.T) {
 		GenE2(),
 	))
 
+	properties.Property("[BLS12-378] IsInSubGroup and MulBy subgroup order should be the same", prop.ForAll(
+		func(a fptower.E2) bool {
+			var op1, op2 G2Jac
+			op1 = fuzzJacobianG2Affine(&g2Gen, a)
+			_r := fr.Modulus()
+			op2.ScalarMultiplication(&op1, _r)
+			return op1.IsInSubGroup() && op2.Z.IsZero()
+		},
+		GenE2(),
+	))
+
 	properties.TestingRun(t, gopter.ConsoleReporter(false))
 }
 

From bd36aeec8f5beb882ee15af99e7d80d94612b6cc Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Wed, 19 Jan 2022 10:16:40 +0100
Subject: [PATCH 21/29] build: run go generate

---
 ecc/bw6-756/fp/doc.go                  |  2 +-
 ecc/bw6-756/fp/element.go              | 10 +++-
 ecc/bw6-756/fp/element_mul_adx_amd64.s |  4 +-
 ecc/bw6-756/fp/element_mul_amd64.s     |  4 +-
 ecc/bw6-756/fr/doc.go                  |  2 +-
 ecc/bw6-756/fr/element.go              | 10 +++-
 ecc/bw6-756/fr/element_mul_adx_amd64.s |  4 +-
 ecc/bw6-756/fr/element_mul_amd64.s     |  4 +-
 ecc/bw6-756/fr/fft/fft_test.go         |  2 +-
 ecc/bw6-756/fr/kzg/kzg_test.go         | 69 ++++++++++++++++++++------
 ecc/bw6-756/g1_test.go                 | 11 ++++
 ecc/bw6-756/g2_test.go                 | 11 ++++
 12 files changed, 103 insertions(+), 30 deletions(-)

diff --git a/ecc/bw6-756/fp/doc.go b/ecc/bw6-756/fp/doc.go
index 033a5b2c8..695fcb4a8 100644
--- a/ecc/bw6-756/fp/doc.go
+++ b/ecc/bw6-756/fp/doc.go
@@ -16,7 +16,7 @@
 
 // Package fp contains field arithmetic operations for modulus = 0xf76adb...000001.
 //
-// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
 //
 // The modulus is hardcoded in all the operations.
 //
diff --git a/ecc/bw6-756/fp/element.go b/ecc/bw6-756/fp/element.go
index 606e5f9dd..b4176bdb4 100644
--- a/ecc/bw6-756/fp/element.go
+++ b/ecc/bw6-756/fp/element.go
@@ -179,6 +179,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case Element:
 		return z.Set(&c1), nil
 	case *Element:
+		if c1 == nil {
+			return nil, errors.New("can't set fp.Element with <nil>")
+		}
 		return z.Set(c1), nil
 	case uint8:
 		return z.SetUint64(uint64(c1)), nil
@@ -203,6 +206,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case string:
 		return z.SetString(c1), nil
 	case *big.Int:
+		if c1 == nil {
+			return nil, errors.New("can't set fp.Element with <nil>")
+		}
 		return z.SetBigInt(c1), nil
 	case big.Int:
 		return z.SetBigInt(&c1), nil
@@ -470,14 +476,14 @@ func (z *Element) Halve() {
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
diff --git a/ecc/bw6-756/fp/element_mul_adx_amd64.s b/ecc/bw6-756/fp/element_mul_adx_amd64.s
index 689a4e512..668d1ae91 100644
--- a/ecc/bw6-756/fp/element_mul_adx_amd64.s
+++ b/ecc/bw6-756/fp/element_mul_adx_amd64.s
@@ -78,7 +78,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $96-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -1868,7 +1868,7 @@ TEXT ·fromMont(SB), $96-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bw6-756/fp/element_mul_amd64.s b/ecc/bw6-756/fp/element_mul_amd64.s
index 738f1a4b6..4e78b6f63 100644
--- a/ecc/bw6-756/fp/element_mul_amd64.s
+++ b/ecc/bw6-756/fp/element_mul_amd64.s
@@ -78,7 +78,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $96-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -1880,7 +1880,7 @@ TEXT ·fromMont(SB), $96-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bw6-756/fr/doc.go b/ecc/bw6-756/fr/doc.go
index 215d19b3d..3b1fdd556 100644
--- a/ecc/bw6-756/fr/doc.go
+++ b/ecc/bw6-756/fr/doc.go
@@ -16,7 +16,7 @@
 
 // Package fr contains field arithmetic operations for modulus = 0x3eeb04...000001.
 //
-// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
+// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@gnark/modular_multiplication)
 //
 // The modulus is hardcoded in all the operations.
 //
diff --git a/ecc/bw6-756/fr/element.go b/ecc/bw6-756/fr/element.go
index afbec34c8..d5edf485e 100644
--- a/ecc/bw6-756/fr/element.go
+++ b/ecc/bw6-756/fr/element.go
@@ -155,6 +155,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case Element:
 		return z.Set(&c1), nil
 	case *Element:
+		if c1 == nil {
+			return nil, errors.New("can't set fr.Element with <nil>")
+		}
 		return z.Set(c1), nil
 	case uint8:
 		return z.SetUint64(uint64(c1)), nil
@@ -179,6 +182,9 @@ func (z *Element) SetInterface(i1 interface{}) (*Element, error) {
 	case string:
 		return z.SetString(c1), nil
 	case *big.Int:
+		if c1 == nil {
+			return nil, errors.New("can't set fr.Element with <nil>")
+		}
 		return z.SetBigInt(c1), nil
 	case big.Int:
 		return z.SetBigInt(&c1), nil
@@ -374,14 +380,14 @@ func (z *Element) Halve() {
 // API with assembly impl
 
 // Mul z = x * y mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Mul(x, y *Element) *Element {
 	mul(z, x, y)
 	return z
 }
 
 // Square z = x * x mod q
-// see https://hackmd.io/@zkteam/modular_multiplication
+// see https://hackmd.io/@gnark/modular_multiplication
 func (z *Element) Square(x *Element) *Element {
 	mul(z, x, x)
 	return z
diff --git a/ecc/bw6-756/fr/element_mul_adx_amd64.s b/ecc/bw6-756/fr/element_mul_adx_amd64.s
index a6f902c36..f48f40ac4 100644
--- a/ecc/bw6-756/fr/element_mul_adx_amd64.s
+++ b/ecc/bw6-756/fr/element_mul_adx_amd64.s
@@ -54,7 +54,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), NOSPLIT, $0-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -571,7 +571,7 @@ TEXT ·mul(SB), NOSPLIT, $0-24
 TEXT ·fromMont(SB), NOSPLIT, $0-8
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bw6-756/fr/element_mul_amd64.s b/ecc/bw6-756/fr/element_mul_amd64.s
index 171a75360..daf2546be 100644
--- a/ecc/bw6-756/fr/element_mul_amd64.s
+++ b/ecc/bw6-756/fr/element_mul_amd64.s
@@ -54,7 +54,7 @@ GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
 TEXT ·mul(SB), $24-24
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// however, to benefit from the ADCX and ADOX carry chains
 	// we split the inner loops in 2:
 	// for i=0 to N-1
@@ -585,7 +585,7 @@ TEXT ·fromMont(SB), $8-8
 	NO_LOCAL_POINTERS
 
 	// the algorithm is described here
-	// https://hackmd.io/@zkteam/modular_multiplication
+	// https://hackmd.io/@gnark/modular_multiplication
 	// when y = 1 we have:
 	// for i=0 to N-1
 	// 		t[i] = x[i]
diff --git a/ecc/bw6-756/fr/fft/fft_test.go b/ecc/bw6-756/fr/fft/fft_test.go
index 4748e01b9..22afedec4 100644
--- a/ecc/bw6-756/fr/fft/fft_test.go
+++ b/ecc/bw6-756/fr/fft/fft_test.go
@@ -377,7 +377,7 @@ func BenchmarkFFTDITCosetReference(b *testing.B) {
 		pol[i] = pol[i-1]
 	}
 
-	domain := NewDomain(maxSize, 0, false)
+	domain := NewDomain(maxSize, 1, false)
 
 	b.ResetTimer()
 	for j := 0; j < b.N; j++ {
diff --git a/ecc/bw6-756/fr/kzg/kzg_test.go b/ecc/bw6-756/fr/kzg/kzg_test.go
index 9e0757166..e898d881d 100644
--- a/ecc/bw6-756/fr/kzg/kzg_test.go
+++ b/ecc/bw6-756/fr/kzg/kzg_test.go
@@ -174,11 +174,23 @@ func TestVerifySinglePoint(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// verify wrong proof
-	proof.ClaimedValue.Double(&proof.ClaimedValue)
-	err = Verify(&digest, &proof, testSRS)
-	if err == nil {
-		t.Fatal("verifying wrong proof should have failed")
+	{
+		// verify wrong proof
+		proof.ClaimedValue.Double(&proof.ClaimedValue)
+		err = Verify(&digest, &proof, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
+	}
+	{
+		// verify wrong proof with quotient set to zero
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proof.H.X.SetZero()
+		proof.H.Y.SetZero()
+		err = Verify(&digest, &proof, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
 	}
 }
 
@@ -224,11 +236,23 @@ func TestBatchVerifySinglePoint(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// verify wrong proof
-	proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
-	err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
-	if err == nil {
-		t.Fatal("verifying wrong proof should have failed")
+	{
+		// verify wrong proof
+		proof.ClaimedValues[0].Double(&proof.ClaimedValues[0])
+		err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
+	}
+	{
+		// verify wrong proof with quotient set to zero
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proof.H.X.SetZero()
+		proof.H.Y.SetZero()
+		err = BatchVerifySinglePoint(digests, &proof, hf, testSRS)
+		if err == nil {
+			t.Fatal("verifying wrong proof should have failed")
+		}
 	}
 
 }
@@ -282,11 +306,26 @@ func TestBatchVerifyMultiPoints(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// batch verify tampered folded proofs
-	proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
-	err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
-	if err == nil {
-		t.Fatal(err)
+	{
+		// batch verify tampered folded proofs
+		proofs[0].ClaimedValue.Double(&proofs[0].ClaimedValue)
+
+		err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+		if err == nil {
+			t.Fatal(err)
+		}
+	}
+	{
+		// batch verify tampered folded proofs with quotients set to infinity
+		// see https://cryptosubtlety.medium.com/00-8d4adcf4d255
+		proofs[0].H.X.SetZero()
+		proofs[0].H.Y.SetZero()
+		proofs[1].H.X.SetZero()
+		proofs[1].H.Y.SetZero()
+		err = BatchVerifyMultiPoints(foldedDigests, proofs, testSRS)
+		if err == nil {
+			t.Fatal(err)
+		}
 	}
 
 }
diff --git a/ecc/bw6-756/g1_test.go b/ecc/bw6-756/g1_test.go
index 06d5277a6..b5167be3f 100644
--- a/ecc/bw6-756/g1_test.go
+++ b/ecc/bw6-756/g1_test.go
@@ -124,6 +124,17 @@ func TestG1AffineIsOnCurve(t *testing.T) {
 		GenFp(),
 	))
 
+	properties.Property("[BW6-756] IsInSubGroup and MulBy subgroup order should be the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G1Jac
+			op1 = fuzzJacobianG1Affine(&g1Gen, a)
+			_r := fr.Modulus()
+			op2.ScalarMultiplication(&op1, _r)
+			return op1.IsInSubGroup() && op2.Z.IsZero()
+		},
+		GenFp(),
+	))
+
 	properties.TestingRun(t, gopter.ConsoleReporter(false))
 }
 
diff --git a/ecc/bw6-756/g2_test.go b/ecc/bw6-756/g2_test.go
index b068b6d3a..23323a5b3 100644
--- a/ecc/bw6-756/g2_test.go
+++ b/ecc/bw6-756/g2_test.go
@@ -124,6 +124,17 @@ func TestG2AffineIsOnCurve(t *testing.T) {
 		GenFp(),
 	))
 
+	properties.Property("[BW6-756] IsInSubGroup and MulBy subgroup order should be the same", prop.ForAll(
+		func(a fp.Element) bool {
+			var op1, op2 G2Jac
+			op1 = fuzzJacobianG2Affine(&g2Gen, a)
+			_r := fr.Modulus()
+			op2.ScalarMultiplication(&op1, _r)
+			return op1.IsInSubGroup() && op2.Z.IsZero()
+		},
+		GenFp(),
+	))
+
 	properties.TestingRun(t, gopter.ConsoleReporter(false))
 }
 

From 75e9125fe7872d035275f296a904d0f045a7ac62 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Wed, 19 Jan 2022 14:13:15 +0100
Subject: [PATCH 22/29] build: add bw6-633 and bw6-756 to kzg constructor

---
 kzg/kzg.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kzg/kzg.go b/kzg/kzg.go
index 2da1e58c5..55a445c16 100644
--- a/kzg/kzg.go
+++ b/kzg/kzg.go
@@ -13,6 +13,8 @@ import (
 	kzg_bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381/fr/kzg"
 	kzg_bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315/fr/kzg"
 	kzg_bn254 "github.com/consensys/gnark-crypto/ecc/bn254/fr/kzg"
+	kzg_bw6633 "github.com/consensys/gnark-crypto/ecc/bw6-633/fr/kzg"
+	kzg_bw6756 "github.com/consensys/gnark-crypto/ecc/bw6-756/fr/kzg"
 	kzg_bw6761 "github.com/consensys/gnark-crypto/ecc/bw6-761/fr/kzg"
 )
 
@@ -38,6 +40,10 @@ func NewSRS(curveID ecc.ID) SRS {
 		return &kzg_bls24315.SRS{}
 	case ecc.BW6_761:
 		return &kzg_bw6761.SRS{}
+	case ecc.BW6_633:
+		return &kzg_bw6633.SRS{}
+	case ecc.BW6_756:
+		return &kzg_bw6756.SRS{}
 	default:
 		panic("not implemented")
 	}

From 7e3417709e2a26488901d39e5351d8d80ff279a5 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Wed, 16 Mar 2022 13:47:50 +0100
Subject: [PATCH 23/29] fix: twisted curve formulae for GT-strong embedded
 curve (a != -1)

---
 ecc/bls12-378/fp/element.go                   | 39 ++++++++------
 ecc/bls12-378/fp/element_test.go              |  8 +++
 ecc/bls12-378/fr/element.go                   | 39 ++++++++------
 ecc/bls12-378/fr/element_test.go              |  8 +++
 ecc/bls12-378/fr/fft/domain.go                |  1 -
 ecc/bls12-378/internal/fptower/e12.go         | 15 ++++++
 ecc/bls12-378/internal/fptower/e12_test.go    | 22 ++++++++
 ecc/bls12-378/internal/fptower/e2.go          | 21 ++++++--
 ecc/bls12-378/internal/fptower/e2_test.go     | 22 ++++++++
 ecc/bls12-378/internal/fptower/e6.go          | 16 ++++++
 ecc/bls12-378/internal/fptower/e6_test.go     | 22 ++++++++
 ecc/bls12-378/multiexp.go                     |  2 +-
 ecc/bls12-378/twistededwards/point.go         | 53 +++++++++----------
 .../template/pointtwistededwards.go.tmpl      | 48 ++++++++++++++++-
 14 files changed, 249 insertions(+), 67 deletions(-)

diff --git a/ecc/bls12-378/fp/element.go b/ecc/bls12-378/fp/element.go
index 4cc255e92..2d8c4bfb6 100644
--- a/ecc/bls12-378/fp/element.go
+++ b/ecc/bls12-378/fp/element.go
@@ -250,8 +250,27 @@ func (z *Element) IsZero() bool {
 	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsOne returns z == 1
+func (z *Element) IsOne() bool {
+	return (z[5] ^ 28498675542444634 | z[4] ^ 13356930855120736188 | z[3] ^ 8832319421896135475 | z[2] ^ 7242180086616818316 | z[1] ^ 10045892448872562649 | z[0] ^ 1481365419032838079) == 0
+}
+
 // IsUint64 reports whether z can be represented as an uint64.
 func (z *Element) IsUint64() bool {
+	zz := *z
+	zz.FromMont()
+	return zz.FitsOnOneWord()
+}
+
+// Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
+func (z *Element) Uint64() uint64 {
+	zz := *z
+	zz.FromMont()
+	return zz[0]
+}
+
+// FitsOnOneWord reports whether z words (except the least significant word) are 0
+func (z *Element) FitsOnOneWord() bool {
 	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
 }
 
@@ -961,13 +980,13 @@ func (z *Element) Text(base int) string {
 	}
 	zz := *z
 	zz.FromMont()
-	if zz.IsUint64() {
+	if zz.FitsOnOneWord() {
 		return strconv.FormatUint(zz[0], base)
 	} else if base == 10 {
 		var zzNeg Element
 		zzNeg.Neg(z)
 		zzNeg.FromMont()
-		if zzNeg.IsUint64() {
+		if zzNeg.FitsOnOneWord() {
 			return "-" + strconv.FormatUint(zzNeg[0], base)
 		}
 	}
@@ -1295,6 +1314,7 @@ const invIterationsN = 26
 // Implements "Optimized Binary GCD for Modular Inversion"
 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
 func (z *Element) Inverse(x *Element) *Element {
+
 	a := *x
 	b := Element{
 		qElementWord0,
@@ -1740,18 +1760,3 @@ func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int6
 
 	return yHi
 }
-
-func (z *Element) EvalPolynomial(monic bool, coefficients []Element, x *Element) {
-	dst := coefficients[len(coefficients)-1]
-
-	if monic {
-		dst.Add(&dst, x)
-	}
-
-	for i := len(coefficients) - 2; i >= 0; i-- {
-		dst.Mul(&dst, x)
-		dst.Add(&dst, &coefficients[i])
-	}
-
-	*z = dst
-}
diff --git a/ecc/bls12-378/fp/element_test.go b/ecc/bls12-378/fp/element_test.go
index 814d9e68f..9c5f2a1a3 100644
--- a/ecc/bls12-378/fp/element_test.go
+++ b/ecc/bls12-378/fp/element_test.go
@@ -2450,6 +2450,14 @@ func TestElementMontNegMultipleOfR(t *testing.T) {
 	}
 }
 
+func TestElement0Inverse(t *testing.T) {
+	var x Element
+	x.Inverse(&x)
+	if !x.IsZero() {
+		t.Fail()
+	}
+}
+
 //TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
 func TestUpdateFactorSubtraction(t *testing.T) {
 	for i := 0; i < 1000; i++ {
diff --git a/ecc/bls12-378/fr/element.go b/ecc/bls12-378/fr/element.go
index 615890e27..eacdff665 100644
--- a/ecc/bls12-378/fr/element.go
+++ b/ecc/bls12-378/fr/element.go
@@ -238,8 +238,27 @@ func (z *Element) IsZero() bool {
 	return (z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsOne returns z == 1
+func (z *Element) IsOne() bool {
+	return (z[3] ^ 1849268063235586341 | z[2] ^ 5455128044303689984 | z[1] ^ 10640745125853265911 | z[0] ^ 11387109765248188409) == 0
+}
+
 // IsUint64 reports whether z can be represented as an uint64.
 func (z *Element) IsUint64() bool {
+	zz := *z
+	zz.FromMont()
+	return zz.FitsOnOneWord()
+}
+
+// Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
+func (z *Element) Uint64() uint64 {
+	zz := *z
+	zz.FromMont()
+	return zz[0]
+}
+
+// FitsOnOneWord reports whether z words (except the least significant word) are 0
+func (z *Element) FitsOnOneWord() bool {
 	return (z[3] | z[2] | z[1]) == 0
 }
 
@@ -789,13 +808,13 @@ func (z *Element) Text(base int) string {
 	}
 	zz := *z
 	zz.FromMont()
-	if zz.IsUint64() {
+	if zz.FitsOnOneWord() {
 		return strconv.FormatUint(zz[0], base)
 	} else if base == 10 {
 		var zzNeg Element
 		zzNeg.Neg(z)
 		zzNeg.FromMont()
-		if zzNeg.IsUint64() {
+		if zzNeg.FitsOnOneWord() {
 			return "-" + strconv.FormatUint(zzNeg[0], base)
 		}
 	}
@@ -1115,6 +1134,7 @@ const invIterationsN = 18
 // Implements "Optimized Binary GCD for Modular Inversion"
 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
 func (z *Element) Inverse(x *Element) *Element {
+
 	a := *x
 	b := Element{
 		qElementWord0,
@@ -1484,18 +1504,3 @@ func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int6
 
 	return yHi
 }
-
-func (z *Element) EvalPolynomial(monic bool, coefficients []Element, x *Element) {
-	dst := coefficients[len(coefficients)-1]
-
-	if monic {
-		dst.Add(&dst, x)
-	}
-
-	for i := len(coefficients) - 2; i >= 0; i-- {
-		dst.Mul(&dst, x)
-		dst.Add(&dst, &coefficients[i])
-	}
-
-	*z = dst
-}
diff --git a/ecc/bls12-378/fr/element_test.go b/ecc/bls12-378/fr/element_test.go
index 92fe5199d..ee5fdf733 100644
--- a/ecc/bls12-378/fr/element_test.go
+++ b/ecc/bls12-378/fr/element_test.go
@@ -2418,6 +2418,14 @@ func TestElementMontNegMultipleOfR(t *testing.T) {
 	}
 }
 
+func TestElement0Inverse(t *testing.T) {
+	var x Element
+	x.Inverse(&x)
+	if !x.IsZero() {
+		t.Fail()
+	}
+}
+
 //TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
 func TestUpdateFactorSubtraction(t *testing.T) {
 	for i := 0; i < 1000; i++ {
diff --git a/ecc/bls12-378/fr/fft/domain.go b/ecc/bls12-378/fr/fft/domain.go
index 42b5bb230..522dd3ca5 100644
--- a/ecc/bls12-378/fr/fft/domain.go
+++ b/ecc/bls12-378/fr/fft/domain.go
@@ -76,7 +76,6 @@ func NewDomain(m uint64) *Domain {
 	const maxOrderRoot uint64 = 42
 	domain.FrMultiplicativeGen.SetUint64(22)
 
-	domain.FrMultiplicativeGen.SetUint64(5)
 	domain.FrMultiplicativeGenInv.Inverse(&domain.FrMultiplicativeGen)
 
 	// find generator for Z/2^(log(m))Z
diff --git a/ecc/bls12-378/internal/fptower/e12.go b/ecc/bls12-378/internal/fptower/e12.go
index aea14150d..07c716fbe 100644
--- a/ecc/bls12-378/internal/fptower/e12.go
+++ b/ecc/bls12-378/internal/fptower/e12.go
@@ -559,3 +559,18 @@ func (z *E12) IsInSubGroup() bool {
 
 	return a.Equal(&b)
 }
+
+func (z *E12) Select(cond int, caseZ *E12, caseNz *E12) *E12 {
+	//Might be able to save a nanosecond or two by an aggregate implementation
+
+	z.C0.Select(cond, &caseZ.C0, &caseNz.C0)
+	z.C1.Select(cond, &caseZ.C1, &caseNz.C1)
+
+	return z
+}
+
+func (z *E12) Div(x *E12, y *E12) *E12 {
+	var r E12
+	r.Inverse(y).Mul(x, &r)
+	return z.Set(&r)
+}
diff --git a/ecc/bls12-378/internal/fptower/e12_test.go b/ecc/bls12-378/internal/fptower/e12_test.go
index 939f945bf..6f5a6ee74 100644
--- a/ecc/bls12-378/internal/fptower/e12_test.go
+++ b/ecc/bls12-378/internal/fptower/e12_test.go
@@ -490,3 +490,25 @@ func BenchmarkE12Expt(b *testing.B) {
 		a.Expt(&a)
 	}
 }
+
+func TestE12Div(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE12()
+	genB := GenE12()
+
+	properties.Property("[BLS12-378] dividing then multiplying by the same element does nothing", prop.ForAll(
+		func(a, b *E12) bool {
+			var c E12
+			c.Div(a, b)
+			c.Mul(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
diff --git a/ecc/bls12-378/internal/fptower/e2.go b/ecc/bls12-378/internal/fptower/e2.go
index ff630a714..18dc7a9b4 100644
--- a/ecc/bls12-378/internal/fptower/e2.go
+++ b/ecc/bls12-378/internal/fptower/e2.go
@@ -26,7 +26,7 @@ type E2 struct {
 	A0, A1 fp.Element
 }
 
-// Equal returns true if z equals x, fasle otherwise
+// Equal returns true if z equals x, false otherwise
 func (z *E2) Equal(x *E2) bool {
 	return z.A0.Equal(&x.A0) && z.A1.Equal(&x.A1)
 }
@@ -93,7 +93,7 @@ func (z *E2) SetRandom() (*E2, error) {
 	return z, nil
 }
 
-// IsZero returns true if the two elements are equal, fasle otherwise
+// IsZero returns true if the two elements are equal, false otherwise
 func (z *E2) IsZero() bool {
 	return z.A0.IsZero() && z.A1.IsZero()
 }
@@ -124,7 +124,7 @@ func (z *E2) Neg(x *E2) *E2 {
 
 // String implements Stringer interface for fancy printing
 func (z *E2) String() string {
-	return (z.A0.String() + "+" + z.A1.String() + "*u")
+	return z.A0.String() + "+" + z.A1.String() + "*u"
 }
 
 // ToMont converts to mont form
@@ -260,3 +260,18 @@ func BatchInvert(a []E2) []E2 {
 
 	return res
 }
+
+func (z *E2) Select(cond int, caseZ *E2, caseNz *E2) *E2 {
+	//Might be able to save a nanosecond or two by an aggregate implementation
+
+	z.A0.Select(cond, &caseZ.A0, &caseNz.A0)
+	z.A1.Select(cond, &caseZ.A1, &caseNz.A1)
+
+	return z
+}
+
+func (z *E2) Div(x *E2, y *E2) *E2 {
+	var r E2
+	r.Inverse(y).Mul(x, &r)
+	return z.Set(&r)
+}
diff --git a/ecc/bls12-378/internal/fptower/e2_test.go b/ecc/bls12-378/internal/fptower/e2_test.go
index 0c3f7e257..cd07f4856 100644
--- a/ecc/bls12-378/internal/fptower/e2_test.go
+++ b/ecc/bls12-378/internal/fptower/e2_test.go
@@ -504,3 +504,25 @@ func BenchmarkE2Conjugate(b *testing.B) {
 		a.Conjugate(&a)
 	}
 }
+
+func TestE2Div(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE2()
+	genB := GenE2()
+
+	properties.Property("[BLS12-378] dividing then multiplying by the same element does nothing", prop.ForAll(
+		func(a, b *E2) bool {
+			var c E2
+			c.Div(a, b)
+			c.Mul(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
diff --git a/ecc/bls12-378/internal/fptower/e6.go b/ecc/bls12-378/internal/fptower/e6.go
index adc33ceef..2ef96c129 100644
--- a/ecc/bls12-378/internal/fptower/e6.go
+++ b/ecc/bls12-378/internal/fptower/e6.go
@@ -262,3 +262,19 @@ func (z *E6) Inverse(x *E6) *E6 {
 
 	return z
 }
+
+func (z *E6) Select(cond int, caseZ *E6, caseNz *E6) *E6 {
+	//Might be able to save a nanosecond or two by an aggregate implementation
+
+	z.B0.Select(cond, &caseZ.B0, &caseNz.B0)
+	z.B1.Select(cond, &caseZ.B1, &caseNz.B1)
+	z.B2.Select(cond, &caseZ.B2, &caseNz.B2)
+
+	return z
+}
+
+func (z *E6) Div(x *E6, y *E6) *E6 {
+	var r E6
+	r.Inverse(y).Mul(x, &r)
+	return z.Set(&r)
+}
diff --git a/ecc/bls12-378/internal/fptower/e6_test.go b/ecc/bls12-378/internal/fptower/e6_test.go
index b6d418d30..3f759a3c2 100644
--- a/ecc/bls12-378/internal/fptower/e6_test.go
+++ b/ecc/bls12-378/internal/fptower/e6_test.go
@@ -315,3 +315,25 @@ func BenchmarkE6Inverse(b *testing.B) {
 		a.Inverse(&a)
 	}
 }
+
+func TestE6Div(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+
+	genA := GenE6()
+	genB := GenE6()
+
+	properties.Property("[BLS12-378] dividing then multiplying by the same element does nothing", prop.ForAll(
+		func(a, b *E6) bool {
+			var c E6
+			c.Div(a, b)
+			c.Mul(&c, b)
+			return c.Equal(a)
+		},
+		genA,
+		genB,
+	))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
diff --git a/ecc/bls12-378/multiexp.go b/ecc/bls12-378/multiexp.go
index e9203d0d0..5f00d4223 100644
--- a/ecc/bls12-378/multiexp.go
+++ b/ecc/bls12-378/multiexp.go
@@ -95,7 +95,7 @@ func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks
 			if scalarsMont {
 				scalar.FromMont()
 			}
-			if scalar.IsUint64() {
+			if scalar.FitsOnOneWord() {
 				// everything is 0, no need to process this scalar
 				if scalar[0] == 0 {
 					continue
diff --git a/ecc/bls12-378/twistededwards/point.go b/ecc/bls12-378/twistededwards/point.go
index 6a4befe08..e49461298 100644
--- a/ecc/bls12-378/twistededwards/point.go
+++ b/ecc/bls12-378/twistededwards/point.go
@@ -479,7 +479,7 @@ func (p *PointExtended) FromAffine(p1 *PointAffine) *PointExtended {
 
 // Add adds points in extended coordinates
 // dedicated addition
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-4
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
 func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 
 	if p1.Equal(p2) {
@@ -488,19 +488,18 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 	}
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
-	tmp.Add(&p2.X, &p2.Y)
-	A.Sub(&p1.Y, &p1.X).
-		Mul(&A, &tmp)
-	tmp.Add(&p1.X, &p1.Y)
-	B.Sub(&p2.Y, &p2.X).
-		Mul(&B, &tmp)
-	C.Mul(&p1.Z, &p2.T).
-		Double(&C)
-	D.Mul(&p2.Z, &p1.T).
-		Double(&D)
+	A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
+	C.Mul(&p1.Z, &p2.T)
+	D.Mul(&p1.T, &p2.Z)
 	E.Add(&D, &C)
-	F.Sub(&B, &A)
-	G.Add(&B, &A)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&E, &F)
@@ -512,7 +511,7 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 }
 
 // MixedAdd adds a point in extended coordinates to a point in affine coordinates
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-madd-2008-hwcd-4
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
 func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExtended {
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
@@ -525,19 +524,19 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 		return p
 	}
 
-	tmp.Add(&p2.X, &p2.Y)
-	A.Sub(&p1.Y, &p1.X).
-		Mul(&A, &tmp)
-	tmp.Add(&p1.X, &p1.Y)
-	B.Sub(&p2.Y, &p2.X).
-		Mul(&B, &tmp)
+	A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
 	C.Mul(&p1.Z, &p2.X).
-		Mul(&C, &p2.Y).
-		Double(&C)
-	D.Double(&p1.T)
+		Mul(&C, &p2.Y)
+	D.Set(&p1.T)
 	E.Add(&D, &C)
-	F.Sub(&B, &A)
-	G.Add(&B, &A)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&F, &E)
@@ -550,7 +549,7 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 
 // Double adds points in extended coordinates
 // Dedicated doubling
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-dbl-2008-hwcd
 func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 	var A, B, C, D, E, F, G, H fr.Element
@@ -579,7 +578,7 @@ func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 // MixedDouble adds points in extended coordinates
 // Dedicated mixed doubling
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-mdbl-2008-hwcd
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-mdbl-2008-hwcd
 func (p *PointExtended) MixedDouble(p1 *PointExtended) *PointExtended {
 
 	var A, B, D, E, G, H, two fr.Element
diff --git a/internal/generator/edwards/template/pointtwistededwards.go.tmpl b/internal/generator/edwards/template/pointtwistededwards.go.tmpl
index 7516de1ca..6e8d3eafd 100644
--- a/internal/generator/edwards/template/pointtwistededwards.go.tmpl
+++ b/internal/generator/edwards/template/pointtwistededwards.go.tmpl
@@ -461,7 +461,11 @@ func (p *PointExtended) FromAffine(p1 *PointAffine) *PointExtended {
 
 // Add adds points in extended coordinates
 // dedicated addition
+{{- if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
+{{- else}}
 // https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-4
+{{- end}}
 func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 
 	if p1.Equal(p2) {
@@ -470,6 +474,20 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 	}
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
+    {{- if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+    A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
+	C.Mul(&p1.Z, &p2.T)
+	D.Mul(&p1.T, &p2.Z)
+	E.Add(&D, &C)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
+    {{- else}}
 	tmp.Add(&p2.X, &p2.Y)
 	A.Sub(&p1.Y, &p1.X).
 		Mul(&A, &tmp)
@@ -483,6 +501,7 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 	E.Add(&D, &C)
 	F.Sub(&B, &A)
 	G.Add(&B, &A)
+    {{- end}}
 	H.Sub(&D, &C)
 
 	p.X.Mul(&E, &F)
@@ -494,7 +513,11 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 }
 
 // MixedAdd adds a point in extended coordinates to a point in affine coordinates
+{{- if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
+{{- else}}
 // https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-madd-2008-hwcd-4
+{{- end}}
 func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExtended {
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
@@ -506,7 +529,21 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 		p.MixedDouble(p1)
 		return p
 	}
-
+{{ if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+    A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
+	C.Mul(&p1.Z, &p2.X).
+		Mul(&C, &p2.Y)
+	D.Set(&p1.T)
+	E.Add(&D, &C)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
+{{- else}}
 	tmp.Add(&p2.X, &p2.Y)
 	A.Sub(&p1.Y, &p1.X).
 		Mul(&A, &tmp)
@@ -520,6 +557,7 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 	E.Add(&D, &C)
 	F.Sub(&B, &A)
 	G.Add(&B, &A)
+{{- end}}
 	H.Sub(&D, &C)
 
 	p.X.Mul(&F, &E)
@@ -532,7 +570,11 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 
 // Double adds points in extended coordinates
 // Dedicated doubling
+{{- if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-dbl-2008-hwcd
+{{- else}}
 // https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+{{- end}}
 func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 	var A, B, C, D, E, F, G, H fr.Element
@@ -561,7 +603,11 @@ func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 // MixedDouble adds points in extended coordinates
 // Dedicated mixed doubling
+{{- if or (eq .Name "bls12-378") (eq .Name "bw6-756")}}
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-mdbl-2008-hwcd
+{{- else}}
 // https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-mdbl-2008-hwcd
+{{- end}}
 func (p *PointExtended) MixedDouble(p1 *PointExtended) *PointExtended {
 
 	var A, B, D, E, G, H, two fr.Element

From 9062d7e07a69f369826240d89191164916bd9993 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Wed, 16 Mar 2022 13:49:59 +0100
Subject: [PATCH 24/29] build: reran go generate

---
 ecc/bw6-756/fp/element.go           | 39 ++++++++++++---------
 ecc/bw6-756/fp/element_test.go      |  8 +++++
 ecc/bw6-756/fr/element.go           | 39 ++++++++++++---------
 ecc/bw6-756/fr/element_test.go      |  8 +++++
 ecc/bw6-756/fr/fft/domain.go        |  1 -
 ecc/bw6-756/multiexp.go             |  2 +-
 ecc/bw6-756/twistededwards/point.go | 53 ++++++++++++++---------------
 7 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/ecc/bw6-756/fp/element.go b/ecc/bw6-756/fp/element.go
index 4b4500c74..dbbe0f377 100644
--- a/ecc/bw6-756/fp/element.go
+++ b/ecc/bw6-756/fp/element.go
@@ -286,8 +286,27 @@ func (z *Element) IsZero() bool {
 	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsOne returns z == 1
+func (z *Element) IsOne() bool {
+	return (z[11] ^ 369351476012747 | z[10] ^ 9468215855567529777 | z[9] ^ 3108243834975866807 | z[8] ^ 2055362399696866477 | z[7] ^ 18366804658688562287 | z[6] ^ 8643488375494563078 | z[5] ^ 4799902015386277509 | z[4] ^ 2720419343484222500 | z[3] ^ 12241294279704278364 | z[2] ^ 15160016368967634470 | z[1] ^ 14463961505609547775 | z[0] ^ 18446744073709547378) == 0
+}
+
 // IsUint64 reports whether z can be represented as an uint64.
 func (z *Element) IsUint64() bool {
+	zz := *z
+	zz.FromMont()
+	return zz.FitsOnOneWord()
+}
+
+// Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
+func (z *Element) Uint64() uint64 {
+	zz := *z
+	zz.FromMont()
+	return zz[0]
+}
+
+// FitsOnOneWord reports whether z words (except the least significant word) are 0
+func (z *Element) FitsOnOneWord() bool {
 	return (z[11] | z[10] | z[9] | z[8] | z[7] | z[6] | z[5] | z[4] | z[3] | z[2] | z[1]) == 0
 }
 
@@ -1669,13 +1688,13 @@ func (z *Element) Text(base int) string {
 	}
 	zz := *z
 	zz.FromMont()
-	if zz.IsUint64() {
+	if zz.FitsOnOneWord() {
 		return strconv.FormatUint(zz[0], base)
 	} else if base == 10 {
 		var zzNeg Element
 		zzNeg.Neg(z)
 		zzNeg.FromMont()
-		if zzNeg.IsUint64() {
+		if zzNeg.FitsOnOneWord() {
 			return "-" + strconv.FormatUint(zzNeg[0], base)
 		}
 	}
@@ -2027,6 +2046,7 @@ const invIterationsN = 50
 // Implements "Optimized Binary GCD for Modular Inversion"
 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
 func (z *Element) Inverse(x *Element) *Element {
+
 	a := *x
 	b := Element{
 		qElementWord0,
@@ -2748,18 +2768,3 @@ func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int6
 
 	return yHi
 }
-
-func (z *Element) EvalPolynomial(monic bool, coefficients []Element, x *Element) {
-	dst := coefficients[len(coefficients)-1]
-
-	if monic {
-		dst.Add(&dst, x)
-	}
-
-	for i := len(coefficients) - 2; i >= 0; i-- {
-		dst.Mul(&dst, x)
-		dst.Add(&dst, &coefficients[i])
-	}
-
-	*z = dst
-}
diff --git a/ecc/bw6-756/fp/element_test.go b/ecc/bw6-756/fp/element_test.go
index 0d0ae2fd2..a6cc49c30 100644
--- a/ecc/bw6-756/fp/element_test.go
+++ b/ecc/bw6-756/fp/element_test.go
@@ -2546,6 +2546,14 @@ func TestElementMontNegMultipleOfR(t *testing.T) {
 	}
 }
 
+func TestElement0Inverse(t *testing.T) {
+	var x Element
+	x.Inverse(&x)
+	if !x.IsZero() {
+		t.Fail()
+	}
+}
+
 //TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
 func TestUpdateFactorSubtraction(t *testing.T) {
 	for i := 0; i < 1000; i++ {
diff --git a/ecc/bw6-756/fr/element.go b/ecc/bw6-756/fr/element.go
index d94821262..f4209ddbb 100644
--- a/ecc/bw6-756/fr/element.go
+++ b/ecc/bw6-756/fr/element.go
@@ -250,8 +250,27 @@ func (z *Element) IsZero() bool {
 	return (z[5] | z[4] | z[3] | z[2] | z[1] | z[0]) == 0
 }
 
+// IsOne returns z == 1
+func (z *Element) IsOne() bool {
+	return (z[5] ^ 28498675542444634 | z[4] ^ 13356930855120736188 | z[3] ^ 8832319421896135475 | z[2] ^ 7242180086616818316 | z[1] ^ 10045892448872562649 | z[0] ^ 1481365419032838079) == 0
+}
+
 // IsUint64 reports whether z can be represented as an uint64.
 func (z *Element) IsUint64() bool {
+	zz := *z
+	zz.FromMont()
+	return zz.FitsOnOneWord()
+}
+
+// Uint64 returns the uint64 representation of x. If x cannot be represented in a uint64, the result is undefined.
+func (z *Element) Uint64() uint64 {
+	zz := *z
+	zz.FromMont()
+	return zz[0]
+}
+
+// FitsOnOneWord reports whether z words (except the least significant word) are 0
+func (z *Element) FitsOnOneWord() bool {
 	return (z[5] | z[4] | z[3] | z[2] | z[1]) == 0
 }
 
@@ -961,13 +980,13 @@ func (z *Element) Text(base int) string {
 	}
 	zz := *z
 	zz.FromMont()
-	if zz.IsUint64() {
+	if zz.FitsOnOneWord() {
 		return strconv.FormatUint(zz[0], base)
 	} else if base == 10 {
 		var zzNeg Element
 		zzNeg.Neg(z)
 		zzNeg.FromMont()
-		if zzNeg.IsUint64() {
+		if zzNeg.FitsOnOneWord() {
 			return "-" + strconv.FormatUint(zzNeg[0], base)
 		}
 	}
@@ -1295,6 +1314,7 @@ const invIterationsN = 26
 // Implements "Optimized Binary GCD for Modular Inversion"
 // https://github.com/pornin/bingcd/blob/main/doc/bingcd.pdf
 func (z *Element) Inverse(x *Element) *Element {
+
 	a := *x
 	b := Element{
 		qElementWord0,
@@ -1740,18 +1760,3 @@ func (z *Element) linearCombNonModular(x *Element, xC int64, y *Element, yC int6
 
 	return yHi
 }
-
-func (z *Element) EvalPolynomial(monic bool, coefficients []Element, x *Element) {
-	dst := coefficients[len(coefficients)-1]
-
-	if monic {
-		dst.Add(&dst, x)
-	}
-
-	for i := len(coefficients) - 2; i >= 0; i-- {
-		dst.Mul(&dst, x)
-		dst.Add(&dst, &coefficients[i])
-	}
-
-	*z = dst
-}
diff --git a/ecc/bw6-756/fr/element_test.go b/ecc/bw6-756/fr/element_test.go
index 07949d1ea..98202ef8a 100644
--- a/ecc/bw6-756/fr/element_test.go
+++ b/ecc/bw6-756/fr/element_test.go
@@ -2450,6 +2450,14 @@ func TestElementMontNegMultipleOfR(t *testing.T) {
 	}
 }
 
+func TestElement0Inverse(t *testing.T) {
+	var x Element
+	x.Inverse(&x)
+	if !x.IsZero() {
+		t.Fail()
+	}
+}
+
 //TODO: Tests like this (update factor related) are common to all fields. Move them to somewhere non-autogen
 func TestUpdateFactorSubtraction(t *testing.T) {
 	for i := 0; i < 1000; i++ {
diff --git a/ecc/bw6-756/fr/fft/domain.go b/ecc/bw6-756/fr/fft/domain.go
index 41e07408d..2cf1957e9 100644
--- a/ecc/bw6-756/fr/fft/domain.go
+++ b/ecc/bw6-756/fr/fft/domain.go
@@ -76,7 +76,6 @@ func NewDomain(m uint64) *Domain {
 	const maxOrderRoot uint64 = 41
 	domain.FrMultiplicativeGen.SetUint64(5)
 
-	domain.FrMultiplicativeGen.SetUint64(5)
 	domain.FrMultiplicativeGenInv.Inverse(&domain.FrMultiplicativeGen)
 
 	// find generator for Z/2^(log(m))Z
diff --git a/ecc/bw6-756/multiexp.go b/ecc/bw6-756/multiexp.go
index dc4306401..2a11889a2 100644
--- a/ecc/bw6-756/multiexp.go
+++ b/ecc/bw6-756/multiexp.go
@@ -95,7 +95,7 @@ func partitionScalars(scalars []fr.Element, c uint64, scalarsMont bool, nbTasks
 			if scalarsMont {
 				scalar.FromMont()
 			}
-			if scalar.IsUint64() {
+			if scalar.FitsOnOneWord() {
 				// everything is 0, no need to process this scalar
 				if scalar[0] == 0 {
 					continue
diff --git a/ecc/bw6-756/twistededwards/point.go b/ecc/bw6-756/twistededwards/point.go
index 1bdff35b5..fadc596dd 100644
--- a/ecc/bw6-756/twistededwards/point.go
+++ b/ecc/bw6-756/twistededwards/point.go
@@ -479,7 +479,7 @@ func (p *PointExtended) FromAffine(p1 *PointAffine) *PointExtended {
 
 // Add adds points in extended coordinates
 // dedicated addition
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-4
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
 func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 
 	if p1.Equal(p2) {
@@ -488,19 +488,18 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 	}
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
-	tmp.Add(&p2.X, &p2.Y)
-	A.Sub(&p1.Y, &p1.X).
-		Mul(&A, &tmp)
-	tmp.Add(&p1.X, &p1.Y)
-	B.Sub(&p2.Y, &p2.X).
-		Mul(&B, &tmp)
-	C.Mul(&p1.Z, &p2.T).
-		Double(&C)
-	D.Mul(&p2.Z, &p1.T).
-		Double(&D)
+	A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
+	C.Mul(&p1.Z, &p2.T)
+	D.Mul(&p1.T, &p2.Z)
 	E.Add(&D, &C)
-	F.Sub(&B, &A)
-	G.Add(&B, &A)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&E, &F)
@@ -512,7 +511,7 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 }
 
 // MixedAdd adds a point in extended coordinates to a point in affine coordinates
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-madd-2008-hwcd-4
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
 func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExtended {
 
 	var A, B, C, D, E, F, G, H, tmp fr.Element
@@ -525,19 +524,19 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 		return p
 	}
 
-	tmp.Add(&p2.X, &p2.Y)
-	A.Sub(&p1.Y, &p1.X).
-		Mul(&A, &tmp)
-	tmp.Add(&p1.X, &p1.Y)
-	B.Sub(&p2.Y, &p2.X).
-		Mul(&B, &tmp)
+	A.Mul(&p1.X, &p2.X)
+	B.Mul(&p1.Y, &p2.Y)
 	C.Mul(&p1.Z, &p2.X).
-		Mul(&C, &p2.Y).
-		Double(&C)
-	D.Double(&p1.T)
+		Mul(&C, &p2.Y)
+	D.Set(&p1.T)
 	E.Add(&D, &C)
-	F.Sub(&B, &A)
-	G.Add(&B, &A)
+	tmp.Sub(&p1.X, &p1.Y)
+	F.Add(&p2.X, &p2.Y).
+		Mul(&F, &tmp).
+		Add(&F, &B).
+		Sub(&F, &A)
+	mulByA(&A)
+	G.Add(&A, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&F, &E)
@@ -550,7 +549,7 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 
 // Double adds points in extended coordinates
 // Dedicated doubling
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-dbl-2008-hwcd
 func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 	var A, B, C, D, E, F, G, H fr.Element
@@ -579,7 +578,7 @@ func (p *PointExtended) Double(p1 *PointExtended) *PointExtended {
 
 // MixedDouble adds points in extended coordinates
 // Dedicated mixed doubling
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-mdbl-2008-hwcd
+// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#doubling-mdbl-2008-hwcd
 func (p *PointExtended) MixedDouble(p1 *PointExtended) *PointExtended {
 
 	var A, B, D, E, G, H, two fr.Element

From 38033e7447f37371032abd9d6a92a757ac7c6f65 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Thu, 17 Mar 2022 13:19:14 +0100
Subject: [PATCH 25/29] feat: SSWU for BLS12-378 (GT-strong)

---
 ecc/bls12-378/sswu_g1.go               | 334 +++++++++++++++++++++++++
 ecc/bls12-378/sswu_g1_test.go          | 157 ++++++++++++
 internal/generator/config/bls12-378.go |  32 +++
 3 files changed, 523 insertions(+)
 create mode 100644 ecc/bls12-378/sswu_g1.go
 create mode 100644 ecc/bls12-378/sswu_g1_test.go

diff --git a/ecc/bls12-378/sswu_g1.go b/ecc/bls12-378/sswu_g1.go
new file mode 100644
index 000000000..a29f2aca7
--- /dev/null
+++ b/ecc/bls12-378/sswu_g1.go
@@ -0,0 +1,334 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+//Note: This only works for simple extensions
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+	"math/big"
+)
+
+func g1IsogenyXNumerator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		false,
+		[]fp.Element{
+			{6470543203100547353, 14665988170059032885, 1939515187828768580, 3603560708219821999, 1095026321559208988, 183854362073986052},
+			{14854739450145697573, 10725610731781000990, 8384146146676813741, 3792524792234600218, 10980132992245118504, 127811114313784269},
+			{7743341424938057712, 1621446876320497654, 9161388112343345155, 13809747965729688982, 14072110284352226775, 77964074263176954},
+		},
+		x)
+}
+
+func g1IsogenyXDenominator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{11480213446153845907, 9569059723295472762, 4133212223950692662, 5656914875334883651, 989021686692303102, 227886835744873894},
+		},
+		x)
+}
+
+func g1IsogenyYNumerator(dst *fp.Element, x *fp.Element, y *fp.Element) {
+	var _dst fp.Element
+	g1EvalPolynomial(&_dst,
+		false,
+		[]fp.Element{
+			{4780610586872381554, 18423150125994475588, 13123772012819260137, 14591853195646969647, 5804992647820306014, 20966723178287685},
+			{2213410472918458692, 9774999437004642556, 13319410716190829459, 14774058963459238058, 12357489533274646735, 248758148590692830},
+			{16759481071713625783, 8645096532612011693, 7097905075491715268, 932195041550141716, 4227816384078368107, 50037860715544812},
+			{3871670712469028856, 10034095475015024635, 4580694056171672577, 16128246019719620299, 7036055142176113387, 38982037131588477},
+		},
+		x)
+
+	dst.Mul(&_dst, y)
+}
+
+func g1IsogenyYDenominator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{17641076928456688137, 8306475833976684855, 8359419817241119003, 12641605213272639883, 9863039736160487870, 55368217170706106},
+			{17969127027499495035, 2884196005202455728, 15703691879418613809, 10094567750702434230, 12004334191193297464, 175264043429118194},
+			{12350127924441855415, 17380644983358010734, 8933124167467608227, 16391120112507168124, 9337764864048325557, 116945264214095313},
+		},
+		x)
+}
+
+func g1Isogeny(p *G1Affine) {
+
+	den := make([]fp.Element, 2)
+
+	g1IsogenyYDenominator(&den[1], &p.X)
+	g1IsogenyXDenominator(&den[0], &p.X)
+
+	g1IsogenyYNumerator(&p.Y, &p.X, &p.Y)
+	g1IsogenyXNumerator(&p.X, &p.X)
+
+	den = fp.BatchInvert(den)
+
+	p.X.Mul(&p.X, &den[0])
+	p.Y.Mul(&p.Y, &den[1])
+}
+
+// g1SqrtRatio computes the square root of u/v and returns 0 iff u/v was indeed a quadratic residue
+// if not, we get sqrt(Z * u / v). Recall that Z is non-residue
+// The main idea is that since the computation of the square root involves taking large powers of u/v, the inversion of v can be avoided
+func g1SqrtRatio(z *fp.Element, u *fp.Element, v *fp.Element) uint64 {
+
+	// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ F.2.1.1. for any field
+
+	tv1 := fp.Element{3422016347327078217, 15952935974507985473, 10210560017327941857, 6195437588884472512, 1531492004832937820, 17090488542823369} //tv1 = c6
+
+	var tv2, tv3, tv4, tv5 fp.Element
+	var exp big.Int
+	// c4 = 2199023255551 = 2^41 - 1
+	// q is odd so c1 is at least 1.
+	exp.SetBytes([]byte{1, 255, 255, 255, 255, 255})
+
+	tv2.Exp(*v, &exp)
+	tv3.Mul(&tv2, &tv2)
+	tv3.Mul(&tv3, v)
+
+	// line 5
+	tv5.Mul(u, &tv3)
+
+	// c3 = 137617509170765099891752579783724504691201148437113468788429769127729045045134922651478473733013131816
+	exp.SetBytes([]byte{251, 172, 16, 89, 161, 52, 100, 20, 242, 215, 73, 3, 180, 65, 232, 161, 1, 103, 173, 145, 196, 8, 201, 166, 3, 112, 216, 52, 41, 39, 95, 243, 165, 253, 218, 160, 139, 0, 0, 38, 82, 40})
+	tv5.Exp(tv5, &exp)
+	tv5.Mul(&tv5, &tv2)
+	tv2.Mul(&tv5, v)
+	tv3.Mul(&tv5, u)
+
+	// line 10
+	tv4.Mul(&tv3, &tv2)
+
+	// c5 = 1099511627776
+	exp.SetBytes([]byte{1, 0, 0, 0, 0, 0})
+	tv5.Exp(tv4, &exp)
+
+	isQNr := g1NotOne(&tv5)
+
+	tv2.Mul(&tv3, &fp.Element{17614810958234635860, 11393801269165528284, 8781501035240632779, 8106712880529013806, 4971838157288047198, 122121039825317715})
+	tv5.Mul(&tv4, &tv1)
+
+	// line 15
+
+	tv3.Select(int(isQNr), &tv3, &tv2)
+	tv4.Select(int(isQNr), &tv4, &tv5)
+
+	exp.Lsh(big.NewInt(1), 41-2)
+
+	for i := 41; i >= 2; i-- {
+		//line 20
+		tv5.Exp(tv4, &exp)
+		nE1 := g1NotOne(&tv5)
+
+		tv2.Mul(&tv3, &tv1)
+		tv1.Mul(&tv1, &tv1)
+		tv5.Mul(&tv4, &tv1)
+
+		tv3.Select(int(nE1), &tv3, &tv2)
+		tv4.Select(int(nE1), &tv4, &tv5)
+
+		exp.Rsh(&exp, 1)
+	}
+
+	*z = tv3
+	return isQNr
+}
+
+/*
+// g1SetZ sets z to [11].
+func g1SetZ(z *fp.Element) {
+    z.Set( &fp.Element  { 5249763402351377716, 3384457438931451475, 13367120442609335946, 13855353052415766542, 11761008755492169078, 30127809456627797 } )
+}*/
+
+// g1MulByZ multiplies x by [11] and stores the result in z
+func g1MulByZ(z *fp.Element, x *fp.Element) {
+
+	res := *x
+
+	res.Double(&res)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	*z = res
+}
+
+// From https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ Pg 80
+func g1SswuMap(u *fp.Element) G1Affine {
+
+	var tv1 fp.Element
+	tv1.Square(u)
+
+	//mul tv1 by Z
+	g1MulByZ(&tv1, &tv1)
+
+	var tv2 fp.Element
+	tv2.Square(&tv1)
+	tv2.Add(&tv2, &tv1)
+
+	var tv3 fp.Element
+	//Standard doc line 5
+	var tv4 fp.Element
+	tv4.SetOne()
+	tv3.Add(&tv2, &tv4)
+	tv3.Mul(&tv3, &fp.Element{10499526804702755432, 6768914877862902950, 8287496811509120276, 9263962031121981469, 5075273437274786541, 60255618913255595})
+
+	tv2NZero := g1NotZero(&tv2)
+
+	// tv4 = Z
+	tv4 = fp.Element{5249763402351377716, 3384457438931451475, 13367120442609335946, 13855353052415766542, 11761008755492169078, 30127809456627797}
+
+	tv2.Neg(&tv2)
+	tv4.Select(int(tv2NZero), &tv4, &tv2)
+	tv2 = fp.Element{15314533651602404840, 3999629397495592995, 17991228730268553058, 13253234862282888158, 4784493033884022421, 276795783356562829}
+	tv4.Mul(&tv4, &tv2)
+
+	tv2.Square(&tv3)
+
+	var tv6 fp.Element
+	//Standard doc line 10
+	tv6.Square(&tv4)
+
+	var tv5 fp.Element
+	tv5.Mul(&tv6, &fp.Element{15314533651602404840, 3999629397495592995, 17991228730268553058, 13253234862282888158, 4784493033884022421, 276795783356562829})
+
+	tv2.Add(&tv2, &tv5)
+	tv2.Mul(&tv2, &tv3)
+	tv6.Mul(&tv6, &tv4)
+
+	//Standards doc line 15
+	tv5.Mul(&tv6, &fp.Element{10499526804702755432, 6768914877862902950, 8287496811509120276, 9263962031121981469, 5075273437274786541, 60255618913255595})
+	tv2.Add(&tv2, &tv5)
+
+	var x fp.Element
+	x.Mul(&tv1, &tv3)
+
+	var y1 fp.Element
+	gx1NSquare := g1SqrtRatio(&y1, &tv2, &tv6)
+
+	var y fp.Element
+	y.Mul(&tv1, u)
+
+	//Standards doc line 20
+	y.Mul(&y, &y1)
+
+	x.Select(int(gx1NSquare), &tv3, &x)
+	y.Select(int(gx1NSquare), &y1, &y)
+
+	y1.Neg(&y)
+	y.Select(int(g1Sgn0(u)^g1Sgn0(&y)), &y, &y1)
+
+	//Standards doc line 25
+	x.Div(&x, &tv4)
+
+	return G1Affine{x, y}
+}
+
+// EncodeToCurveG1SSWU maps a fp.Element to a point on the curve using the Simplified Shallue and van de Woestijne Ulas map
+//https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/#section-6.6.3
+func EncodeToCurveG1SSWU(msg, dst []byte) (G1Affine, error) {
+
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+
+	res = g1SswuMap(&u[0])
+
+	//this is in an isogenous curve
+	g1Isogeny(&res)
+
+	res.ClearCofactor(&res)
+
+	return res, nil
+}
+
+// HashToCurveG1SSWU hashes a byte string to the G1 curve. Usable as a random oracle.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1SSWU(msg, dst []byte) (G1Affine, error) {
+	u, err := hashToFp(msg, dst, 2*1)
+	if err != nil {
+		return G1Affine{}, err
+	}
+
+	Q0 := g1SswuMap(&u[0])
+	Q1 := g1SswuMap(&u[1])
+
+	//TODO: Add in E' first, then apply isogeny
+	g1Isogeny(&Q0)
+	g1Isogeny(&Q1)
+
+	var _Q0, _Q1 G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1).AddAssign(&_Q0)
+
+	_Q1.ClearCofactor(&_Q1)
+
+	Q1.FromJacobian(&_Q1)
+	return Q1, nil
+}
+
+// g1Sgn0 is an algebraic substitute for the notion of sign in ordered fields
+// Namely, every non-zero quadratic residue in a finite field of characteristic =/= 2 has exactly two square roots, one of each sign
+// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/ section 4.1
+// The sign of an element is not obviously related to that of its Montgomery form
+func g1Sgn0(z *fp.Element) uint64 {
+
+	nonMont := *z
+	nonMont.FromMont()
+
+	return nonMont[0] % 2
+
+}
+
+func g1EvalPolynomial(z *fp.Element, monic bool, coefficients []fp.Element, x *fp.Element) {
+	dst := coefficients[len(coefficients)-1]
+
+	if monic {
+		dst.Add(&dst, x)
+	}
+
+	for i := len(coefficients) - 2; i >= 0; i-- {
+		dst.Mul(&dst, x)
+		dst.Add(&dst, &coefficients[i])
+	}
+
+	z.Set(&dst)
+}
+
+func g1NotZero(x *fp.Element) uint64 {
+
+	return x[0] | x[1] | x[2] | x[3] | x[4] | x[5]
+
+}
+
+func g1NotOne(x *fp.Element) uint64 {
+
+	var one fp.Element
+	return one.SetOne().NotEqual(x)
+
+}
diff --git a/ecc/bls12-378/sswu_g1_test.go b/ecc/bls12-378/sswu_g1_test.go
new file mode 100644
index 000000000..b7a3382e5
--- /dev/null
+++ b/ecc/bls12-378/sswu_g1_test.go
@@ -0,0 +1,157 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bls12378
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fp"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+	"math/rand"
+	"testing"
+)
+
+func TestG1SqrtRatio(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+	gen := genCoordElemG1(t)
+
+	properties.Property("G1SqrtRatio must square back to the right value", prop.ForAll(
+		func(u fp.Element, v fp.Element) bool {
+
+			var seen fp.Element
+			qr := g1SqrtRatio(&seen, &u, &v) == 0
+
+			seen.
+				Square(&seen).
+				Mul(&seen, &v)
+
+			var ref fp.Element
+			if qr {
+				ref = u
+			} else {
+				g1MulByZ(&ref, &u)
+			}
+
+			return seen.Equal(&ref)
+		}, gen, gen))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func genCoordElemG1(t *testing.T) gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomElem := func() fp.Element {
+			var a fp.Element
+
+			if _, err := a.SetRandom(); err != nil {
+				t.Error(err)
+			}
+
+			return a
+		}
+		a := genRandomElem()
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func g1TestMatchCoord(t *testing.T, coordName string, msg string, expectedStr string, seen *fp.Element) {
+	var expected fp.Element
+
+	expected.SetString(expectedStr)
+
+	if !expected.Equal(seen) {
+		t.Errorf("mismatch on \"%s\", %s:\n\texpected %s\n\tsaw      %s", msg, coordName, expected.String(), seen)
+	}
+}
+
+func g1TestMatch(t *testing.T, c hashTestCase, seen *G1Affine) {
+	g1TestMatchCoord(t, "x", c.msg, c.x, &seen.X)
+	g1TestMatchCoord(t, "y", c.msg, c.y, &seen.Y)
+}
+
+func TestEncodeToCurveG1SSWU(t *testing.T) {
+
+	for _, c := range g1EncodeToCurveSSWUVector.cases {
+		seen, err := EncodeToCurveG1SSWU([]byte(c.msg), g1EncodeToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g1TestMatch(t, c, &seen)
+	}
+}
+
+func TestHashToCurveG1SSWU(t *testing.T) {
+	for _, c := range g1HashToCurveSSWUVector.cases {
+		seen, err := HashToCurveG1SSWU([]byte(c.msg), g1HashToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g1TestMatch(t, c, &seen)
+	}
+	t.Log(len(g1HashToCurveSSWUVector.cases), "cases verified")
+}
+
+func BenchmarkG1EncodeToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g1EncodeToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := EncodeToCurveG1SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+func BenchmarkG1HashToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g1HashToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := HashToCurveG1SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+type hashTestVector struct {
+	dst   []byte
+	cases []hashTestCase
+}
+
+type hashTestCase struct {
+	msg string
+	x   string
+	y   string
+}
+
+var g1HashToCurveSSWUVector hashTestVector
+var g1EncodeToCurveSSWUVector hashTestVector
diff --git a/internal/generator/config/bls12-378.go b/internal/generator/config/bls12-378.go
index 4fb9eddfe..3abc1c453 100644
--- a/internal/generator/config/bls12-378.go
+++ b/internal/generator/config/bls12-378.go
@@ -8,6 +8,7 @@ var BLS12_378 = Curve{
 	FpModulus:    "605248206075306171733248481581800960739847691770924913753520744034740935903401304776283802348837311170974282940417",
 	G1: Point{
 		CoordType:        "fp.Element",
+		CoordExtDegree:   1,
 		PointName:        "g1",
 		GLV:              true,
 		CofactorCleaning: true,
@@ -15,12 +16,43 @@ var BLS12_378 = Curve{
 	},
 	G2: Point{
 		CoordType:        "fptower.E2",
+		CoordExtDegree:   2,
 		PointName:        "g2",
 		GLV:              true,
 		CofactorCleaning: true,
 		CRange:           defaultCRange(),
 		Projective:       true,
 	},
+	HashE1: &HashSuite{
+		A: []string{"0x3eeb0416684d18f2c41f0ac56b4172c97877b1f2170ca6f42387dd67a2cc5c175e179b1a06ffff79e0723fffffffff2"},
+		B: []string{"0x16"},
+		Z: []int{11},
+		Isogeny: &Isogeny{
+			XMap: RationalPolynomial{
+				Num: [][]string{
+					{"0x2f304310ce39d2c3011a6d50eb4ece730cab541269dbc53c7594241b1c244eff01c0ce03cbe00000000000000000000"},
+					{"0x9d9ea03fd9a908c76d1012fb4743eb0c720b5849c7b761ff1e3f31fc34200004ca4510000000001"},
+					{"0x2f304310ce39d2c3ed885db0b1cc5b9e3043708b54c1a5cf20a52889c7b761fdaf1f98fe1a1000072f6798000000001"},
+				},
+				Den: [][]string{
+					{"0x2767a80ff66a4231db4404bed1d0fac31c82d61271edd87fc78fcc7f0d0800013291440000000004"},
+				},
+			},
+			YMap: RationalPolynomial{
+				Num: [][]string{
+					{"0x2f304310ce39d2c3ed885db0b1cc5b9e3043708b54c1a5cf20a52889c7b761fdaf1f98fe1a1000072f6797fffffffff"},
+					{"0x7dd6082cd09a322f6a993378ddbf030e107849ad95ef7bbdbc611d64e38ea7c4e9cea46c7d00013291440000000002"},
+					{"0x1f75820b34268c838ac8d9803d05ca3f43c5122b2366f9c76b7f1f7530b7fefd221e864e5f90000bf9aca8000000002"},
+					{"0x370da3939b4375e4951f17f8cf6e6ae3384eadf7e2e1ec1c50c0af4b69009cfd4c4f87d31e68000861f8dc000000001"},
+				},
+				Den: [][]string{
+					{"0x3eeb0416684d19053cb5d240ed107a284059eb647102326980dc360d0a49d7fce97f76a822c00009948a1fffffffff9"},
+					{"0xec6df05fc67d8d2b23981c78eae5e092ab11046eab9312fead5ecafa4e3000072f6798000000000c"},
+					{"0x7636f82fe33ec69591cc0e3c7572f0495588823755c9897f56af657d2718000397b3cc000000000c"},
+				},
+			},
+		},
+	},
 }
 
 func init() {

From 9ebd1c2441a33af559fb12b74c0ec39523666760 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Thu, 17 Mar 2022 15:46:43 +0100
Subject: [PATCH 26/29] feat: SSWU for BW6-756 (outter to GT-strong)

---
 ecc/bw6-756/sswu_g1.go               | 334 ++++++++++++++++++++++
 ecc/bw6-756/sswu_g1_test.go          | 157 +++++++++++
 ecc/bw6-756/sswu_g2.go               | 406 +++++++++++++++++++++++++++
 ecc/bw6-756/sswu_g2_test.go          | 146 ++++++++++
 internal/generator/config/bw6-756.go | 136 +++++++++
 5 files changed, 1179 insertions(+)
 create mode 100644 ecc/bw6-756/sswu_g1.go
 create mode 100644 ecc/bw6-756/sswu_g1_test.go
 create mode 100644 ecc/bw6-756/sswu_g2.go
 create mode 100644 ecc/bw6-756/sswu_g2_test.go

diff --git a/ecc/bw6-756/sswu_g1.go b/ecc/bw6-756/sswu_g1.go
new file mode 100644
index 000000000..10e062cad
--- /dev/null
+++ b/ecc/bw6-756/sswu_g1.go
@@ -0,0 +1,334 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+//Note: This only works for simple extensions
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"math/big"
+)
+
+func g1IsogenyXNumerator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		false,
+		[]fp.Element{
+			{15061764643505403874, 13688922241935764410, 13945096449919150208, 8436067060318510061, 4337050741185236015, 1993211805393091759, 13630591835847640353, 16993584682990519360, 8971204976640521592, 11479362202198843551, 7505613342680806802, 2623277435220162},
+			{2256652953469429710, 7986151259564652590, 7173717765514014760, 5325211358772542105, 11720722258571317741, 1827380411336239380, 2945628459421726487, 15684476260997112721, 16275963394068506199, 3647627339057733797, 6840443943923060230, 2788437309956548},
+			{18446744073709550557, 14704964401664098303, 11309681711681649385, 14698000069680550214, 13454959400917254747, 10801466177653926084, 15239716878514852502, 16581223703780321416, 14202840701532685566, 17948097119401515826, 5922093086188465715, 2268644466718328},
+		},
+		x)
+}
+
+func g1IsogenyXDenominator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{9026611813877718838, 6035453010921316536, 17062904658006647585, 11643627583491349159, 14230214847810026090, 5797047097534633924, 14807366920250713864, 14779814887555727504, 10347853169840149009, 1246596934729393539, 13141619286505907834, 2448522848965627},
+		},
+		x)
+}
+
+func g1IsogenyYNumerator(dst *fp.Element, x *fp.Element, y *fp.Element) {
+	var _dst fp.Element
+	g1EvalPolynomial(&_dst,
+		false,
+		[]fp.Element{
+			{7417, 4223785464154554368, 17883137121165483676, 8662155583981545100, 8014120713948809746, 1201662146881371066, 16399484201235277962, 16741102533822300073, 10092115902138952610, 11731609449449782212, 5432405448762957777, 1529941514692833},
+			{17685820612365145165, 10133590496207434412, 4439341389815863358, 16237166311366871531, 12586675296657379027, 5752817071182370007, 18238899548945746049, 5474128448956424977, 4657871854268244383, 3092250989713984389, 13902867206193867696, 2147032802810159},
+			{3384979430204144565, 11979226889346978885, 1537204611416246332, 17211189075013588966, 17581083387856976611, 11964442653859134878, 13641814725987365538, 14303342354640893273, 15190573054247983491, 5471441008586600696, 10260665915884590345, 4182655964934822},
+			{18446744073709551087, 18441456226093760511, 13174518475280565460, 9763304497739979922, 14890648247077438592, 5778851725779543941, 6863645168616474272, 15668448372570953649, 6955362397092648018, 12310026665076143326, 11127771683818204033, 3310628831074305},
+		},
+		x)
+
+	dst.Mul(&_dst, y)
+}
+
+func g1IsogenyYDenominator(dst *fp.Element, x *fp.Element) {
+	g1EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{33905, 17146720447904350208, 4439688729395715465, 17578719130422492408, 13009726419073394047, 17697253371943596573, 3126642749599797879, 15395188361529500510, 1711728968787230262, 252749604653387985, 14374699731745910598, 1397801387328302},
+			{17266182735847260650, 2756724634261996626, 1058985484926618486, 5542736661445693400, 1636838704864690045, 12564472000181253073, 6593938171842885778, 4786584001734410359, 1113513889954172149, 12176975388293072748, 6351201184167411439, 1553692990780230},
+			{8633091367923604897, 14375155055950078505, 17702614661430909603, 11655529750965086232, 7917563376482904218, 16634904018698739975, 9041039154614942318, 11137027547596045016, 12888931343158284942, 15514578667146961408, 4644663504360229534, 2992955351466600},
+		},
+		x)
+}
+
+func g1Isogeny(p *G1Affine) {
+
+	den := make([]fp.Element, 2)
+
+	g1IsogenyYDenominator(&den[1], &p.X)
+	g1IsogenyXDenominator(&den[0], &p.X)
+
+	g1IsogenyYNumerator(&p.Y, &p.X, &p.Y)
+	g1IsogenyXNumerator(&p.X, &p.X)
+
+	den = fp.BatchInvert(den)
+
+	p.X.Mul(&p.X, &den[0])
+	p.Y.Mul(&p.Y, &den[1])
+}
+
+// g1SqrtRatio computes the square root of u/v and returns 0 iff u/v was indeed a quadratic residue
+// if not, we get sqrt(Z * u / v). Recall that Z is non-residue
+// The main idea is that since the computation of the square root involves taking large powers of u/v, the inversion of v can be avoided
+func g1SqrtRatio(z *fp.Element, u *fp.Element, v *fp.Element) uint64 {
+
+	// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ F.2.1.1. for any field
+
+	tv1 := fp.Element{17302715199413996045, 15077845457253267709, 8842885729139027579, 12189878420705505575, 12380986790262239346, 585111498723936856, 4947215576903759546, 1186632482028566920, 14543050817583235372, 5644943604719368358, 9440830989708189862, 1039766423535362} //tv1 = c6
+
+	var tv2, tv3, tv4, tv5 fp.Element
+	var exp big.Int
+	// c4 = 4835703278458516698824703 = 2^82 - 1
+	// q is odd so c1 is at least 1.
+	exp.SetBytes([]byte{3, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255})
+
+	tv2.Exp(*v, &exp)
+	tv3.Mul(&tv2, &tv2)
+	tv3.Mul(&tv3, v)
+
+	// line 5
+	tv5.Mul(u, &tv3)
+
+	// c3 = 37877157660731232732990269576663233239936484746509109593426423261538632780449313352717366389444912082695314931794809746268936574949192324351273838279701014606648452884726586254167471840902479876056412368
+	exp.SetBytes([]byte{1, 238, 213, 183, 107, 119, 49, 92, 85, 130, 79, 195, 198, 173, 25, 235, 146, 241, 154, 95, 88, 89, 209, 63, 126, 70, 68, 40, 170, 44, 116, 217, 152, 213, 206, 120, 133, 72, 219, 61, 96, 89, 2, 93, 64, 159, 85, 65, 79, 214, 57, 103, 160, 220, 200, 220, 82, 89, 162, 189, 182, 200, 212, 168, 96, 85, 71, 132, 177, 188, 251, 218, 22, 208, 189, 13, 10, 73, 216, 6, 120, 252, 199, 240, 208})
+	tv5.Exp(tv5, &exp)
+	tv5.Mul(&tv5, &tv2)
+	tv2.Mul(&tv5, v)
+	tv3.Mul(&tv5, u)
+
+	// line 10
+	tv4.Mul(&tv3, &tv2)
+
+	// c5 = 2417851639229258349412352
+	exp.SetBytes([]byte{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
+	tv5.Exp(tv4, &exp)
+
+	isQNr := g1NotOne(&tv5)
+
+	tv2.Mul(&tv3, &fp.Element{13990906742184113945, 15879050380504523621, 13768460034940508157, 12337541071329853620, 6296858130192020747, 9289986178217863086, 18403114759403589657, 4546259071787184045, 5504643400205978814, 13830311104669138548, 96107744534255859, 1024735223965534})
+	tv5.Mul(&tv4, &tv1)
+
+	// line 15
+
+	tv3.Select(int(isQNr), &tv3, &tv2)
+	tv4.Select(int(isQNr), &tv4, &tv5)
+
+	exp.Lsh(big.NewInt(1), 82-2)
+
+	for i := 82; i >= 2; i-- {
+		//line 20
+		tv5.Exp(tv4, &exp)
+		nE1 := g1NotOne(&tv5)
+
+		tv2.Mul(&tv3, &tv1)
+		tv1.Mul(&tv1, &tv1)
+		tv5.Mul(&tv4, &tv1)
+
+		tv3.Select(int(nE1), &tv3, &tv2)
+		tv4.Select(int(nE1), &tv4, &tv5)
+
+		exp.Rsh(&exp, 1)
+	}
+
+	*z = tv3
+	return isQNr
+}
+
+/*
+// g1SetZ sets z to [11].
+func g1SetZ(z *fp.Element) {
+    z.Set( &fp.Element  { 18446744073709504998, 11529623972028612607, 739483395258014634, 5527028560780200701, 11477868704616895891, 15905434021829949368, 2844651761892435780, 17567410508478669002, 4162242322955979641, 15743938111024983262, 11916654042695069468, 4062866236140222 } )
+}*/
+
+// g1MulByZ multiplies x by [11] and stores the result in z
+func g1MulByZ(z *fp.Element, x *fp.Element) {
+
+	res := *x
+
+	res.Double(&res)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	*z = res
+}
+
+// From https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ Pg 80
+func g1SswuMap(u *fp.Element) G1Affine {
+
+	var tv1 fp.Element
+	tv1.Square(u)
+
+	//mul tv1 by Z
+	g1MulByZ(&tv1, &tv1)
+
+	var tv2 fp.Element
+	tv2.Square(&tv1)
+	tv2.Add(&tv2, &tv1)
+
+	var tv3 fp.Element
+	//Standard doc line 5
+	var tv4 fp.Element
+	tv4.SetOne()
+	tv3.Add(&tv2, &tv4)
+	tv3.Mul(&tv3, &fp.Element{18446744073709458379, 881299893533802495, 4886355625346099349, 6225448195760991771, 6629400315996169345, 12607886696045185322, 7201730065066775519, 1932403901886200506, 8616600553259348813, 6369175937589644082, 7499857803942196586, 3773119276850162})
+
+	tv2NZero := g1NotZero(&tv2)
+
+	// tv4 = Z
+	tv4 = fp.Element{18446744073709504998, 11529623972028612607, 739483395258014634, 5527028560780200701, 11477868704616895891, 15905434021829949368, 2844651761892435780, 17567410508478669002, 4162242322955979641, 15743938111024983262, 11916654042695069468, 4062866236140222}
+
+	tv2.Neg(&tv2)
+	tv4.Select(int(tv2NZero), &tv4, &tv2)
+	tv2 = fp.Element{6087387690755251612, 7643068232434215576, 6195945763281467660, 97569654519975969, 1505434147110560758, 12342644747290341982, 14059794106692380317, 15229664573794943703, 16908793757593141664, 1949816925291208189, 9451095697369482684, 234190359239853}
+	tv4.Mul(&tv4, &tv2)
+
+	tv2.Square(&tv3)
+
+	var tv6 fp.Element
+	//Standard doc line 10
+	tv6.Square(&tv4)
+
+	var tv5 fp.Element
+	tv5.Mul(&tv6, &fp.Element{6087387690755251612, 7643068232434215576, 6195945763281467660, 97569654519975969, 1505434147110560758, 12342644747290341982, 14059794106692380317, 15229664573794943703, 16908793757593141664, 1949816925291208189, 9451095697369482684, 234190359239853})
+
+	tv2.Add(&tv2, &tv5)
+	tv2.Mul(&tv2, &tv3)
+	tv6.Mul(&tv6, &tv4)
+
+	//Standards doc line 15
+	tv5.Mul(&tv6, &fp.Element{18446744073709458379, 881299893533802495, 4886355625346099349, 6225448195760991771, 6629400315996169345, 12607886696045185322, 7201730065066775519, 1932403901886200506, 8616600553259348813, 6369175937589644082, 7499857803942196586, 3773119276850162})
+	tv2.Add(&tv2, &tv5)
+
+	var x fp.Element
+	x.Mul(&tv1, &tv3)
+
+	var y1 fp.Element
+	gx1NSquare := g1SqrtRatio(&y1, &tv2, &tv6)
+
+	var y fp.Element
+	y.Mul(&tv1, u)
+
+	//Standards doc line 20
+	y.Mul(&y, &y1)
+
+	x.Select(int(gx1NSquare), &tv3, &x)
+	y.Select(int(gx1NSquare), &y1, &y)
+
+	y1.Neg(&y)
+	y.Select(int(g1Sgn0(u)^g1Sgn0(&y)), &y, &y1)
+
+	//Standards doc line 25
+	x.Div(&x, &tv4)
+
+	return G1Affine{x, y}
+}
+
+// EncodeToCurveG1SSWU maps a fp.Element to a point on the curve using the Simplified Shallue and van de Woestijne Ulas map
+//https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/#section-6.6.3
+func EncodeToCurveG1SSWU(msg, dst []byte) (G1Affine, error) {
+
+	var res G1Affine
+	u, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+
+	res = g1SswuMap(&u[0])
+
+	//this is in an isogenous curve
+	g1Isogeny(&res)
+
+	res.ClearCofactor(&res)
+
+	return res, nil
+}
+
+// HashToCurveG1SSWU hashes a byte string to the G1 curve. Usable as a random oracle.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG1SSWU(msg, dst []byte) (G1Affine, error) {
+	u, err := hashToFp(msg, dst, 2*1)
+	if err != nil {
+		return G1Affine{}, err
+	}
+
+	Q0 := g1SswuMap(&u[0])
+	Q1 := g1SswuMap(&u[1])
+
+	//TODO: Add in E' first, then apply isogeny
+	g1Isogeny(&Q0)
+	g1Isogeny(&Q1)
+
+	var _Q0, _Q1 G1Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1).AddAssign(&_Q0)
+
+	_Q1.ClearCofactor(&_Q1)
+
+	Q1.FromJacobian(&_Q1)
+	return Q1, nil
+}
+
+// g1Sgn0 is an algebraic substitute for the notion of sign in ordered fields
+// Namely, every non-zero quadratic residue in a finite field of characteristic =/= 2 has exactly two square roots, one of each sign
+// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/ section 4.1
+// The sign of an element is not obviously related to that of its Montgomery form
+func g1Sgn0(z *fp.Element) uint64 {
+
+	nonMont := *z
+	nonMont.FromMont()
+
+	return nonMont[0] % 2
+
+}
+
+func g1EvalPolynomial(z *fp.Element, monic bool, coefficients []fp.Element, x *fp.Element) {
+	dst := coefficients[len(coefficients)-1]
+
+	if monic {
+		dst.Add(&dst, x)
+	}
+
+	for i := len(coefficients) - 2; i >= 0; i-- {
+		dst.Mul(&dst, x)
+		dst.Add(&dst, &coefficients[i])
+	}
+
+	z.Set(&dst)
+}
+
+func g1NotZero(x *fp.Element) uint64 {
+
+	return x[0] | x[1] | x[2] | x[3] | x[4] | x[5] | x[6] | x[7] | x[8] | x[9] | x[10] | x[11]
+
+}
+
+func g1NotOne(x *fp.Element) uint64 {
+
+	var one fp.Element
+	return one.SetOne().NotEqual(x)
+
+}
diff --git a/ecc/bw6-756/sswu_g1_test.go b/ecc/bw6-756/sswu_g1_test.go
new file mode 100644
index 000000000..010857548
--- /dev/null
+++ b/ecc/bw6-756/sswu_g1_test.go
@@ -0,0 +1,157 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+	"math/rand"
+	"testing"
+)
+
+func TestG1SqrtRatio(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+	gen := genCoordElemG1(t)
+
+	properties.Property("G1SqrtRatio must square back to the right value", prop.ForAll(
+		func(u fp.Element, v fp.Element) bool {
+
+			var seen fp.Element
+			qr := g1SqrtRatio(&seen, &u, &v) == 0
+
+			seen.
+				Square(&seen).
+				Mul(&seen, &v)
+
+			var ref fp.Element
+			if qr {
+				ref = u
+			} else {
+				g1MulByZ(&ref, &u)
+			}
+
+			return seen.Equal(&ref)
+		}, gen, gen))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func genCoordElemG1(t *testing.T) gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomElem := func() fp.Element {
+			var a fp.Element
+
+			if _, err := a.SetRandom(); err != nil {
+				t.Error(err)
+			}
+
+			return a
+		}
+		a := genRandomElem()
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func g1TestMatchCoord(t *testing.T, coordName string, msg string, expectedStr string, seen *fp.Element) {
+	var expected fp.Element
+
+	expected.SetString(expectedStr)
+
+	if !expected.Equal(seen) {
+		t.Errorf("mismatch on \"%s\", %s:\n\texpected %s\n\tsaw      %s", msg, coordName, expected.String(), seen)
+	}
+}
+
+func g1TestMatch(t *testing.T, c hashTestCase, seen *G1Affine) {
+	g1TestMatchCoord(t, "x", c.msg, c.x, &seen.X)
+	g1TestMatchCoord(t, "y", c.msg, c.y, &seen.Y)
+}
+
+func TestEncodeToCurveG1SSWU(t *testing.T) {
+
+	for _, c := range g1EncodeToCurveSSWUVector.cases {
+		seen, err := EncodeToCurveG1SSWU([]byte(c.msg), g1EncodeToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g1TestMatch(t, c, &seen)
+	}
+}
+
+func TestHashToCurveG1SSWU(t *testing.T) {
+	for _, c := range g1HashToCurveSSWUVector.cases {
+		seen, err := HashToCurveG1SSWU([]byte(c.msg), g1HashToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g1TestMatch(t, c, &seen)
+	}
+	t.Log(len(g1HashToCurveSSWUVector.cases), "cases verified")
+}
+
+func BenchmarkG1EncodeToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g1EncodeToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := EncodeToCurveG1SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+func BenchmarkG1HashToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g1HashToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := HashToCurveG1SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+type hashTestVector struct {
+	dst   []byte
+	cases []hashTestCase
+}
+
+type hashTestCase struct {
+	msg string
+	x   string
+	y   string
+}
+
+var g1HashToCurveSSWUVector hashTestVector
+var g1EncodeToCurveSSWUVector hashTestVector
diff --git a/ecc/bw6-756/sswu_g2.go b/ecc/bw6-756/sswu_g2.go
new file mode 100644
index 000000000..0777dc8db
--- /dev/null
+++ b/ecc/bw6-756/sswu_g2.go
@@ -0,0 +1,406 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+//Note: This only works for simple extensions
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+	"math/big"
+)
+
+func g2IsogenyXNumerator(dst *fp.Element, x *fp.Element) {
+	g2EvalPolynomial(dst,
+		false,
+		[]fp.Element{
+			{18063257888214992592, 1393496779859630263, 8564933104033120466, 1364340956348512280, 2179459776642261738, 7246713717490156421, 11399242774977305051, 4643984571529960155, 5648608782916908274, 3095467176315578154, 3467607453741162011, 2961445036873290},
+			{11658835549372914133, 12286399758052775709, 12748454159994037005, 8627322543101307557, 8488782471683565130, 3311860832147265783, 16666746629317462942, 3993283414811786302, 9484238281806419324, 9079179108706802563, 4853121176603729598, 4115220294490375},
+			{9804516088925396249, 1689484976811888131, 6709027706281311158, 15148698413255486787, 4456196932928240544, 13089600824592779987, 4915748753406253244, 8824016979977565722, 5801663995596799579, 18101997615639527323, 2024314854882138752, 3982056613208256},
+			{2442176974808264130, 1036024567776342463, 3553154388034084009, 5407139347352086633, 18271230407247584526, 15484355645761637943, 751188679793694145, 4984314664481512509, 610694156402706023, 4400103409344523273, 10219908803514347843, 147859773786044},
+			{4103257481960472996, 3407771621173017780, 10892974841255474934, 16253724974777807377, 6376172419506182724, 15853776788744583196, 1223963103902005581, 13511207157402706386, 11263217018011554306, 4332839954383721397, 5081830234251762489, 2677604750100570},
+			{16143817283423817884, 16868200401550225159, 17247373971233108250, 4471527367127116953, 3288264420010388700, 13080898939531555317, 309418061729162402, 4570902553714880875, 12501140036399581948, 16460091225458830175, 3432750371563496540, 1856425305308313},
+			{6616034961392165287, 13262605314780833274, 11095673701643109564, 15659197846522771275, 6221107675939471385, 10040356795873210634, 7142688090102529240, 5217229900361698120, 107142442904534986, 4880883056892442152, 17848994686119140771, 2209292322719356},
+			{8209889887433583591, 11125531176963853968, 10429304465365845536, 9698924055228808053, 11572483637041790867, 8471024052732620731, 16171107090788573972, 17389518512515618487, 2446717027925821414, 13845569984714024499, 10358707895980715431, 1981955952337795},
+			{17440571817547589032, 14821760434103399038, 3467158545118505628, 1052069839390006688, 16597307299261243349, 1514696147834692288, 1502298074058409421, 9724159673512954906, 17146431833140791833, 12568150970058967052, 4051586399753035900, 3034883876673207},
+			{6885431716405590573, 11285374911458481328, 8536774051778827233, 537308629644561775, 4978081861472261373, 6895244255083366290, 12771428336377158707, 18366942941316985453, 3885842168611052213, 15733883558726650552, 1898691920676539973, 1053947946004101},
+			{6438046397076616827, 8748193276523668278, 12924065944264387518, 10396411676633151501, 8884810883874904938, 11581924250560173624, 7958603540887013994, 11929384227933921591, 12058158172733608199, 13011125749180986554, 8011584745771515935, 2769359570595999},
+			{8485125453864892728, 9496478782187944243, 9835406788412012827, 13148749213470338156, 14346720249036467428, 11042761504554563134, 9885278452952011517, 2699400820004207727, 6861670924206724915, 10779917832009994523, 6892501690509198304, 1120589998747771},
+			{17370092359602020084, 5939626380863061634, 15363278959077316883, 17700203104572168868, 6623108653336641881, 12109660681271032827, 6997648647599827246, 7989523002331856142, 2551599959583933382, 13522924635998791246, 10964309953846145036, 2038629017267546},
+			{8905023910868005352, 2644351245783489261, 12712934134422018629, 2320762562258764217, 18070651629628507397, 9497759956968634620, 1196234375306649517, 11762569576125533470, 12760978227532514829, 2100402702948591945, 14717565707450064032, 3190123599240964},
+			{2321122152288065791, 8559934930487477171, 2816091705874298985, 12816347129408254318, 13759444057793103650, 9245275761719025150, 4583905162646958584, 193512411034013027, 5373879090500334116, 14965705556666027214, 15938856173769808949, 1679542184021728},
+			{3064863255627260738, 1322412353639848785, 18016733159894272674, 10335342398228990822, 1019043548854784599, 12367531741647476417, 16806389067180361238, 11090164111872500153, 8574474748101498214, 10816633183836176713, 6244604899848898765, 3568151292359233},
+			{13882469591933839029, 3780726533369959129, 356529851879299843, 4882617127020139027, 7330450063362196924, 16839599917433694663, 15301540460601913522, 9954563827398335290, 1349170473954715280, 9235680880529941043, 1342314491728118367, 3440448802314722},
+			{9829752897409242023, 15704098846998971558, 3953782753040335701, 13733604128466395046, 15940385395599933276, 11943859108918239708, 14026342461240088190, 5157027042114130164, 17730504431960750988, 5346615480493660819, 1155792137343476704, 317557884360030},
+		},
+		x)
+}
+
+func g2IsogenyXDenominator(dst *fp.Element, x *fp.Element) {
+	g2EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{7676806743992704411, 13899270453609261655, 12451810503333105973, 7586377905044480253, 4400073814052305663, 8772770626841823456, 13952615522127684289, 14011211693111766274, 8877781094782098197, 15891608636494308341, 15564328328945326253, 2909487247725599},
+			{9453865609050903521, 16187066946506957518, 11903964155097034943, 8713536912762661644, 14536674352530403432, 9468882702901471210, 7150526403476766966, 5620694243891780279, 8606838083802511379, 15607157215838277797, 15500728334841342059, 2231399860031313},
+			{16980338680259670981, 11521927360174526230, 3296865033313109705, 17336540695467956468, 16363282394107312970, 8395304954809411637, 4445751931845641868, 15986692066173048677, 10373032380811327027, 7515107668367104040, 1285158972258773404, 3381474465871678},
+			{12590065689114661726, 5946446231937317556, 6252935342450242612, 3946482967954549718, 5477460783051802954, 9554038960651143508, 14284409639302464644, 8381919432766671976, 15064388857143239334, 11198709445699977935, 1505776422996134004, 228166822329897},
+			{8718455273244450707, 18257164560137771694, 63167664726716276, 10939327297441407123, 6906792779173733504, 6910059409960120006, 15613529393540514307, 9642032594321541892, 11604953731100391742, 12576353621847180703, 16967498256620051397, 3178252700562399},
+			{868685242467462437, 6117772598772167454, 180017912061117271, 6682693383856096681, 14087880244443394467, 10065045297523127645, 12651808868021479222, 253494832475484277, 13284798185615450963, 9622862694522664923, 12723192387819738881, 3881837484675755},
+			{16652886415926729528, 15222425659919396075, 7883409001976488877, 108334016999777799, 2139096560884051070, 9235143524812320273, 362394594825970392, 1985075437469393064, 15285298004551295011, 4694377468798011199, 14002413277245730823, 1031059376762955},
+			{11607763233042652646, 15107090951981795493, 750429153195033507, 15447410638764619883, 17517491898415918026, 6872460098882921300, 15826126419240939753, 13286003994964494101, 18044211163024483308, 996549296944201754, 658907883085288864, 3183797990537945},
+			{4230116250557350613, 3657094985405752339, 7217616869566592695, 1233781281998910073, 17857880953005547064, 6893178357644011918, 14339433834014725723, 15278301383782487737, 14319056202540821048, 9161061358875487159, 1721449113640690872, 4232195864691114},
+			{9776057899978497849, 18295317405102606195, 499915739732625100, 9734977242896698887, 8865331344776907994, 11499915146337835230, 2528236165698619045, 12194855496531305052, 9067458767906246251, 18260802818211288667, 2059444010111999228, 1594971276166605},
+			{16948142999539174921, 7749442011298877799, 8782365616033113710, 10062902276000792382, 8578445819659278006, 18441606647876159181, 12027817196043418722, 10967829751748336848, 13807613859264788489, 14146619962209840535, 237985113988347841, 3780579450083530},
+			{2994714778176611012, 15395498331776333933, 11906372358806379274, 3870528935878244793, 4010010522904287132, 9137750020304910250, 15901170280015152991, 10139251366754058601, 11005484872982035363, 8422618689683690561, 14842869397711928390, 3247505200278147},
+			{101073571214872246, 18383559315189060379, 6907519649047275056, 13062498176165399437, 16983943441557859234, 18299740594093156677, 5880497128789512121, 3932944625598144568, 5690929158366038599, 7602942967413953653, 13778217666122444261, 1907343141630194},
+			{2065675606533188185, 835926001041832944, 2586561621060881533, 8657377230123209903, 10089711764069753420, 3314659207458483825, 17753035059206418973, 14321321176141429822, 14378558285243871759, 12481010464609732173, 1367439684858703947, 117846213348560},
+			{1753375113789138573, 5582726390451056543, 18111137772277889143, 8599937063653169974, 9539333829070716249, 10105075059414936548, 13701354987781444925, 15726824122155311967, 8194284482795973630, 17314739116151717281, 9538018392868702110, 1367705449205283},
+			{9090248073906778481, 2107788472131405778, 12924810070024706928, 15005609447201957126, 961945466227986008, 8749282965734757112, 7718473509162793608, 10637582677835211604, 13790836079088289949, 4207625851985012931, 2762901280724194593, 1893895310850181},
+		},
+		x)
+}
+
+func g2IsogenyYNumerator(dst *fp.Element, x *fp.Element, y *fp.Element) {
+	var _dst fp.Element
+	g2EvalPolynomial(&_dst,
+		false,
+		[]fp.Element{
+			{5051598811638517557, 737162936682204453, 18266277940781649426, 8297934493309658251, 14587852221828932567, 17520883813942983557, 10498215533406532450, 5832211223751606295, 3600880350437357340, 2825910535639299749, 12647163569588255163, 275061126760636},
+			{12559678814741632895, 7859048468261858149, 4236488078359479360, 16841239042007152658, 5312075531742262702, 3178741308357192977, 14664365762224592012, 14895611710779307411, 17003497102161051726, 12561805960382829864, 1689612490434121511, 4339709385293787},
+			{4476631763572289814, 16082167790703866320, 7394388887204004896, 3955363014434416656, 13817310402283032879, 6647511166122297131, 14720286221254837855, 4472005002784771460, 4703528211196572387, 15590281238800563442, 15593255919960024691, 2166688893510683},
+			{2503229986694465765, 8594864623388797577, 11771007318796339592, 12178353071709987741, 9395567834543143117, 1623478240275898563, 10422013481936289821, 5751815158810662048, 6129775232805856798, 16015977650288458746, 11063526906033396515, 359428695871498},
+			{3172204394861266886, 7178324459329989193, 12055192196751786661, 9556747627156995614, 7108346223162691745, 12196486755584093700, 13296299274967806249, 743835019303585217, 12362670244290053814, 6976826164013006385, 6645425301774832941, 3420842202901129},
+			{7103471633677248769, 10501892449765124953, 7979329109825360150, 16750428365225432440, 15586582932171624974, 14076198087256240428, 2628477998626483788, 7821198768462694495, 7466756118781704838, 13948888709477971062, 7927369204535991101, 2058102271014739},
+			{5288823391484243258, 13731831771514171493, 12881614458960554030, 12148729456237475052, 8848547805942475948, 12243669284994705772, 16485291633832482603, 16058960579070538785, 17547062151649259172, 6905305693038127501, 18220013145790156749, 2972569751643637},
+			{13361596835977281893, 17117667659587365983, 6339637372565868085, 15717095851481368394, 16547894374671072774, 5617618166823667001, 14514184715672603774, 7631006970098262425, 6062683165562369640, 14437434371647350980, 18414922128376530006, 2265037525519879},
+			{9106851493396134797, 3871350947879893854, 11419315593021252893, 5677821281985076357, 11422846285152591355, 9656976370798298175, 11940691516517696790, 8167972214220646424, 12377408730237178633, 18329224238304815706, 5868665003522831600, 4308812217155104},
+			{7457550484179549606, 17785821431645724103, 8610295788728590146, 4940001953956769495, 3943521744044194208, 4168544217828160098, 5813372651329180488, 9347943620560663705, 15479905250276510060, 14862637707067876829, 17259565500085355864, 1633018135761863},
+			{10835542273568905129, 8092036257982094713, 14709920468851814741, 16853250056678220902, 6812464373424184914, 5889056178203244050, 17630558818321130987, 11291602263438171858, 5363215832179755730, 1406328387408005099, 17557253559133413174, 4179578472005619},
+			{14855153380663865487, 13851563109375821599, 13068709803362046654, 11885935936878191666, 5065974774005451260, 2466622550050362922, 18101639232595559187, 7571004564246793426, 11590196373886920649, 4421221247140818609, 1615676370004474465, 1368767916401153},
+			{3229602409663261297, 18370132806978542041, 8156118142877923158, 10568467618403222100, 4717245830849486971, 1321623212661780313, 2402246841054709322, 1497696416423214573, 478732305369471040, 4937841867770020731, 15317670803771231546, 1916834553357246},
+			{5311741649814063446, 7140916177349472547, 4629497167543615722, 3247838709755899354, 9349955712890521943, 10806931643455376261, 9797786755886426509, 8936361737713300483, 4680758664059980220, 12406007169225434383, 3533709596282820669, 2455674357456026},
+			{12488981058804589629, 12457922297183532626, 451262455699675331, 2589192099576276852, 15580980055328892440, 18403052471820082137, 12189972068214284937, 13256129327810144998, 3772655204155038395, 16041014234959313151, 72181953470569885, 4281388365540118},
+			{14355041116718584904, 6518908914215076528, 103136633313959269, 11248008213672993918, 6970489461061342522, 11790861983726804408, 13550185278440680395, 13812178300427159463, 16743587002127429862, 4048335839682807858, 14698087464497806541, 2419921007991860},
+			{5234990458074203996, 13017287354230506146, 14528142910692005959, 3772795329349177430, 5743699393195056243, 7634427128601001554, 13604641011547791987, 135487290822320499, 3790885956722941871, 11074651945233114222, 18082432112342758574, 574821069767001},
+			{52578721445132518, 4807297948059579044, 13300282182261617411, 1966048695126804313, 9698133404944599207, 8820248505019030680, 3493492344229743544, 2207289649260572501, 10632810226164047214, 4971453237616087970, 913429712705966969, 1297201987563834},
+			{17825584147787228187, 9643369156770337109, 4780283242127859977, 17498229400171506032, 16108828831928294847, 2084860329621159147, 18414157538260388952, 800287692256631753, 5695704543389678503, 4041482112338895649, 11390130574267716400, 2960465040246311},
+			{6300502775430373558, 8369258902650415287, 3125753630970140323, 18204543611164718383, 526661945080208710, 15161036982699244642, 17578969572176151165, 12378105092679056730, 18183133059714169258, 12446959345931710099, 15917824696898340511, 2833318602758342},
+			{13591805356567102552, 7114002070737981430, 15792963314576163910, 2439908364129503908, 287177262616586422, 15991174737764895668, 6785081176756496415, 4252801190810180185, 15974406619965461794, 5898178132521952779, 16735071778182207400, 3747243900171232},
+			{15813641761354694888, 5775215055800921627, 6840760130944862528, 6877362349591737745, 15249413449075939135, 4616164489636862746, 10144566619146579982, 13413064070702175791, 1351336168301780689, 13223512313899745023, 2505242198188713520, 4008932646590333},
+			{12443271823023628293, 10023560508435423279, 223996864810707239, 9381771480330342249, 6071616917663934748, 8368415673838482178, 9943888360412422135, 16466687579138830466, 13092709466108236949, 9672861101133407978, 10061835049751520054, 1548449159771126},
+			{682372491237998985, 11642050129783406054, 11148560572438111964, 14522736662327934823, 11397999623021472703, 1908450234014681122, 7015571940646005434, 12379468932656542568, 16774893396095877790, 6713564321785078684, 16841673995724381835, 2735911680003515},
+			{13599451869524933025, 12884519722266096890, 17760075748912444940, 7988627628311688106, 14881807801342021418, 13083553921104407380, 14041750000390895271, 2472546647851334075, 3041343147406232878, 12920007657510457765, 995078962479205134, 2579040578744874},
+		},
+		x)
+
+	dst.Mul(&_dst, y)
+}
+
+func g2IsogenyYDenominator(dst *fp.Element, x *fp.Element) {
+	g2EvalPolynomial(dst,
+		true,
+		[]fp.Element{
+			{1334792841914619146, 679589714283882072, 17083068605088219648, 4813351246215290848, 18172042438030301686, 14277203889184557608, 15837808323580910883, 10306580648728757893, 11003641200468979478, 940386280777395627, 3499226008293819953, 2670356276894702},
+			{9755707125415732584, 11547150959279383035, 10562244305064472457, 10066300461986435643, 7148237540886167626, 14216704987748624229, 16406871087277952232, 12806845016466455100, 2351888649504415376, 4581239853380640174, 16076162830476753346, 568274864174746},
+			{893765329958948899, 6025675675869057371, 17671761618169526031, 290857361517030098, 10896841519848110098, 17285132374146627243, 6578246538169581645, 14698668347892092920, 11847506233132912482, 13196346148126584369, 8267604772654803804, 1874938974390476},
+			{6735883464220501378, 12278479419325863497, 14385176350002363003, 327505777929575292, 12879051046191573898, 5244035810968697501, 3180567461135135676, 17276308001343046319, 3006667409138758213, 15967849879716326079, 16187453789268986652, 605590141583629},
+			{15477429905846444566, 874980550924067157, 812459596667087749, 15729930408665072903, 14188396940358881836, 8936483967067860411, 11560783477954519230, 3137560215625292057, 9575790530359643406, 17927687113810822901, 2632829296588147564, 435712685367393},
+			{17106890255742260517, 15652864607222471998, 10170811537593627639, 1453924004107446790, 11329170583151456244, 5141789719850306806, 16125025042683025570, 15076092565634391873, 8561206140914607069, 4463443131371537634, 11670432227453805701, 1689349098401896},
+			{9129230268637416312, 16131424300862894054, 11599455708249086086, 13712061106571700271, 3222547574018203825, 5028201422192587329, 12665893808043057006, 7910268944325741984, 13694452156601909132, 7594944072090734412, 6879517203248637902, 1138400281829526},
+			{535993526973048097, 3775829409162107138, 13717328028075297397, 931342578972386679, 9677314225136962692, 1800755985773403875, 10333191064113944512, 493249530510038265, 3476098351926766495, 1181720279204825908, 12777038111429138841, 3384692890585801},
+			{6772381476956843695, 1547184579308225297, 9184478537476982368, 18151871132032417756, 11438493594152051480, 13120718087287853722, 906650696512912608, 13671423743799524436, 5153011690436366245, 3698430439766668188, 15092448703784862617, 3030866256020577},
+			{8895000324450291201, 15634640715766263880, 7490240763823358372, 3817243160942000768, 6890890435018787185, 9346708229944942004, 18064456205166879593, 12575272864892718868, 15621790773858233563, 14813107721424927109, 8430498945985935887, 867090342578861},
+			{12739036589867187355, 9795881745418909683, 5361817503043430525, 7798704069278097907, 18096088569918482706, 2937445070828306418, 9991593439280666445, 15110215917620419457, 921826231237854254, 5913792261041157634, 18090796787901390905, 2793222464357693},
+			{3438032613460386180, 6204279332812840808, 14376358175686985834, 18381747228820135059, 14447820798834719386, 16106544860316335529, 14025545057246822816, 2539847764773437436, 4552716052497346485, 5833901919148093744, 13521223476405376994, 2797828028642414},
+			{4675764524655489549, 7417596943369946677, 9595278207029085025, 5342154649589543352, 18404482716153763072, 2184348815728369995, 14066858184312939843, 16474267930709347218, 2362065103222405087, 3078344645281881403, 13469353821803478908, 304023594704714},
+			{13336540334000055874, 12363933549556690779, 9868198113486195987, 17200559107951779810, 10375132447814441523, 16052049732850218957, 8448578710116104683, 271552525374008518, 8581766105752285893, 6711806820578914344, 14469754450868659033, 1966815926308711},
+			{5101880310190341644, 15159071985462456973, 7149774646728826197, 12576407457185946487, 17860866655468525238, 4507443374425691963, 17618967833350108716, 7421484418298085264, 6158022389899675973, 108715159554595854, 9388611835741856695, 3474499278497195},
+			{15767622754071481373, 6666535779016477106, 4339596343074584784, 4593977422804012446, 8260949087674697005, 2604766566145864187, 10520452650074955497, 9902394558193624589, 12775751068121307426, 10756048839832512283, 7768310516012640156, 1089103621897159},
+			{5654788162836550163, 13749357347467323269, 13974653046468144914, 15362323059394904107, 10078076543768098232, 11650909370168368583, 12022622616989434446, 16634402826677993967, 6252490137121470998, 10291036775792765672, 13911313412251103552, 1186595065825944},
+			{9121325598321594489, 4133870066251362254, 8684200896151736292, 11861689787908128953, 15931378051893636612, 12451213672520635731, 12914385543399447420, 8572569051941496555, 2726874492944369081, 10801779996113951741, 5864104807527920738, 1982990352964359},
+			{11090133365261209413, 15814955091366535667, 18339652705270534313, 12470411874283941774, 7241404949791996603, 8825390212707063972, 9965333245992945637, 8672796411653931019, 1540315761125104703, 12195021924674475366, 18070568062581496335, 3519904720183927},
+			{1301757369757148520, 2291752182431491104, 18031597538102297383, 1002578022922400175, 17456614351673438697, 13042781829346793120, 15050871415347786395, 14032535732147643747, 1819269103672142070, 5052963464011981037, 3935730851297570856, 3098099795194852},
+			{1598327534129828645, 1100792835020160421, 14836203351660379192, 14908939120570101326, 16000114216335714301, 3466300856246268575, 3868982654067330071, 9459325155447144247, 16298126741239179846, 4600431113223808736, 16925178278642030656, 2162687411385134},
+			{17788148994062165922, 7922294369928924429, 14353729795536090711, 11780816328988316738, 4791479909649638698, 4114510181199450025, 1665112860230268902, 9464476004182348196, 10513922006780843432, 7647020907782691752, 907039416571889407, 2600070448243251},
+			{18279740539772034858, 13488091108525447666, 7632730442377627346, 14964872363027481484, 10761640429414937302, 12381919936976899683, 11478319527418241156, 9634723465607073757, 6359127698243469512, 2747481342146307926, 7704083516220795600, 1774160264476624},
+			{13635372110860167721, 1296080719790173115, 11867537485597319624, 10870737671048455066, 11726493726432719410, 12745805811649554768, 12333923534385142391, 17801909532926800273, 11608940035451353880, 2975460672602133984, 5200998817217096522, 664536368560130},
+		},
+		x)
+}
+
+func g2Isogeny(p *G2Affine) {
+
+	den := make([]fp.Element, 2)
+
+	g2IsogenyYDenominator(&den[1], &p.X)
+	g2IsogenyXDenominator(&den[0], &p.X)
+
+	g2IsogenyYNumerator(&p.Y, &p.X, &p.Y)
+	g2IsogenyXNumerator(&p.X, &p.X)
+
+	den = fp.BatchInvert(den)
+
+	p.X.Mul(&p.X, &den[0])
+	p.Y.Mul(&p.Y, &den[1])
+}
+
+// g2SqrtRatio computes the square root of u/v and returns 0 iff u/v was indeed a quadratic residue
+// if not, we get sqrt(Z * u / v). Recall that Z is non-residue
+// The main idea is that since the computation of the square root involves taking large powers of u/v, the inversion of v can be avoided
+func g2SqrtRatio(z *fp.Element, u *fp.Element, v *fp.Element) uint64 {
+
+	// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ F.2.1.1. for any field
+
+	tv1 := fp.Element{17302715199413996045, 15077845457253267709, 8842885729139027579, 12189878420705505575, 12380986790262239346, 585111498723936856, 4947215576903759546, 1186632482028566920, 14543050817583235372, 5644943604719368358, 9440830989708189862, 1039766423535362} //tv1 = c6
+
+	var tv2, tv3, tv4, tv5 fp.Element
+	var exp big.Int
+	// c4 = 4835703278458516698824703 = 2^82 - 1
+	// q is odd so c1 is at least 1.
+	exp.SetBytes([]byte{3, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255})
+
+	tv2.Exp(*v, &exp)
+	tv3.Mul(&tv2, &tv2)
+	tv3.Mul(&tv3, v)
+
+	// line 5
+	tv5.Mul(u, &tv3)
+
+	// c3 = 37877157660731232732990269576663233239936484746509109593426423261538632780449313352717366389444912082695314931794809746268936574949192324351273838279701014606648452884726586254167471840902479876056412368
+	exp.SetBytes([]byte{1, 238, 213, 183, 107, 119, 49, 92, 85, 130, 79, 195, 198, 173, 25, 235, 146, 241, 154, 95, 88, 89, 209, 63, 126, 70, 68, 40, 170, 44, 116, 217, 152, 213, 206, 120, 133, 72, 219, 61, 96, 89, 2, 93, 64, 159, 85, 65, 79, 214, 57, 103, 160, 220, 200, 220, 82, 89, 162, 189, 182, 200, 212, 168, 96, 85, 71, 132, 177, 188, 251, 218, 22, 208, 189, 13, 10, 73, 216, 6, 120, 252, 199, 240, 208})
+	tv5.Exp(tv5, &exp)
+	tv5.Mul(&tv5, &tv2)
+	tv2.Mul(&tv5, v)
+	tv3.Mul(&tv5, u)
+
+	// line 10
+	tv4.Mul(&tv3, &tv2)
+
+	// c5 = 2417851639229258349412352
+	exp.SetBytes([]byte{2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
+	tv5.Exp(tv4, &exp)
+
+	isQNr := g2NotOne(&tv5)
+
+	tv2.Mul(&tv3, &fp.Element{13990906742184113945, 15879050380504523621, 13768460034940508157, 12337541071329853620, 6296858130192020747, 9289986178217863086, 18403114759403589657, 4546259071787184045, 5504643400205978814, 13830311104669138548, 96107744534255859, 1024735223965534})
+	tv5.Mul(&tv4, &tv1)
+
+	// line 15
+
+	tv3.Select(int(isQNr), &tv3, &tv2)
+	tv4.Select(int(isQNr), &tv4, &tv5)
+
+	exp.Lsh(big.NewInt(1), 82-2)
+
+	for i := 82; i >= 2; i-- {
+		//line 20
+		tv5.Exp(tv4, &exp)
+		nE1 := g2NotOne(&tv5)
+
+		tv2.Mul(&tv3, &tv1)
+		tv1.Mul(&tv1, &tv1)
+		tv5.Mul(&tv4, &tv1)
+
+		tv3.Select(int(nE1), &tv3, &tv2)
+		tv4.Select(int(nE1), &tv4, &tv5)
+
+		exp.Rsh(&exp, 1)
+	}
+
+	*z = tv3
+	return isQNr
+}
+
+/*
+// g2SetZ sets z to [11].
+func g2SetZ(z *fp.Element) {
+    z.Set( &fp.Element  { 18446744073709504998, 11529623972028612607, 739483395258014634, 5527028560780200701, 11477868704616895891, 15905434021829949368, 2844651761892435780, 17567410508478669002, 4162242322955979641, 15743938111024983262, 11916654042695069468, 4062866236140222 } )
+}*/
+
+// g2MulByZ multiplies x by [11] and stores the result in z
+func g2MulByZ(z *fp.Element, x *fp.Element) {
+
+	res := *x
+
+	res.Double(&res)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	res.Double(&res)
+
+	res.Add(&res, x)
+
+	*z = res
+}
+
+// From https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/ Pg 80
+func g2SswuMap(u *fp.Element) G2Affine {
+
+	var tv1 fp.Element
+	tv1.Square(u)
+
+	//mul tv1 by Z
+	g2MulByZ(&tv1, &tv1)
+
+	var tv2 fp.Element
+	tv2.Square(&tv1)
+	tv2.Add(&tv2, &tv1)
+
+	var tv3 fp.Element
+	//Standard doc line 5
+	var tv4 fp.Element
+	tv4.SetOne()
+	tv3.Add(&tv2, &tv4)
+	tv3.Mul(&tv3, &fp.Element{3597427888115195847, 8485485194496420669, 9451115945982544412, 10217463679676360079, 3023875305953960937, 5866766270380139867, 15059909646037855295, 1065687373540957157, 12978541562777068958, 18112033168403904062, 11632286302244735111, 1469792042332206})
+
+	tv2NZero := g2NotZero(&tv2)
+
+	// tv4 = Z
+	tv4 = fp.Element{18446744073709504998, 11529623972028612607, 739483395258014634, 5527028560780200701, 11477868704616895891, 15905434021829949368, 2844651761892435780, 17567410508478669002, 4162242322955979641, 15743938111024983262, 11916654042695069468, 4062866236140222}
+
+	tv2.Neg(&tv2)
+	tv4.Select(int(tv2NZero), &tv4, &tv2)
+	tv2 = fp.Element{11188695195863236139, 18339800635248689929, 13644954250665578253, 16122525194076552550, 1985822167495960177, 11021218035968661748, 12951199075167016614, 18080500199774882647, 3065668365127963650, 1810223365641727596, 18249180996905802984, 4351293214471385}
+	tv4.Mul(&tv4, &tv2)
+
+	tv2.Square(&tv3)
+
+	var tv6 fp.Element
+	//Standard doc line 10
+	tv6.Square(&tv4)
+
+	var tv5 fp.Element
+	tv5.Mul(&tv6, &fp.Element{11188695195863236139, 18339800635248689929, 13644954250665578253, 16122525194076552550, 1985822167495960177, 11021218035968661748, 12951199075167016614, 18080500199774882647, 3065668365127963650, 1810223365641727596, 18249180996905802984, 4351293214471385})
+
+	tv2.Add(&tv2, &tv5)
+	tv2.Mul(&tv2, &tv3)
+	tv6.Mul(&tv6, &tv4)
+
+	//Standards doc line 15
+	tv5.Mul(&tv6, &fp.Element{3597427888115195847, 8485485194496420669, 9451115945982544412, 10217463679676360079, 3023875305953960937, 5866766270380139867, 15059909646037855295, 1065687373540957157, 12978541562777068958, 18112033168403904062, 11632286302244735111, 1469792042332206})
+	tv2.Add(&tv2, &tv5)
+
+	var x fp.Element
+	x.Mul(&tv1, &tv3)
+
+	var y1 fp.Element
+	gx1NSquare := g2SqrtRatio(&y1, &tv2, &tv6)
+
+	var y fp.Element
+	y.Mul(&tv1, u)
+
+	//Standards doc line 20
+	y.Mul(&y, &y1)
+
+	x.Select(int(gx1NSquare), &tv3, &x)
+	y.Select(int(gx1NSquare), &y1, &y)
+
+	y1.Neg(&y)
+	y.Select(int(g2Sgn0(u)^g2Sgn0(&y)), &y, &y1)
+
+	//Standards doc line 25
+	x.Div(&x, &tv4)
+
+	return G2Affine{x, y}
+}
+
+// EncodeToCurveG2SSWU maps a fp.Element to a point on the curve using the Simplified Shallue and van de Woestijne Ulas map
+//https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/13/#section-6.6.3
+func EncodeToCurveG2SSWU(msg, dst []byte) (G2Affine, error) {
+
+	var res G2Affine
+	u, err := hashToFp(msg, dst, 1)
+	if err != nil {
+		return res, err
+	}
+
+	res = g2SswuMap(&u[0])
+
+	//this is in an isogenous curve
+	g2Isogeny(&res)
+
+	res.ClearCofactor(&res)
+
+	return res, nil
+}
+
+// HashToCurveG2SSWU hashes a byte string to the G2 curve. Usable as a random oracle.
+// https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06#section-3
+func HashToCurveG2SSWU(msg, dst []byte) (G2Affine, error) {
+	u, err := hashToFp(msg, dst, 2*1)
+	if err != nil {
+		return G2Affine{}, err
+	}
+
+	Q0 := g2SswuMap(&u[0])
+	Q1 := g2SswuMap(&u[1])
+
+	//TODO: Add in E' first, then apply isogeny
+	g2Isogeny(&Q0)
+	g2Isogeny(&Q1)
+
+	var _Q0, _Q1 G2Jac
+	_Q0.FromAffine(&Q0)
+	_Q1.FromAffine(&Q1).AddAssign(&_Q0)
+
+	_Q1.ClearCofactor(&_Q1)
+
+	Q1.FromJacobian(&_Q1)
+	return Q1, nil
+}
+
+// g2Sgn0 is an algebraic substitute for the notion of sign in ordered fields
+// Namely, every non-zero quadratic residue in a finite field of characteristic =/= 2 has exactly two square roots, one of each sign
+// Taken from https://datatracker.ietf.org/doc/draft-irtf-cfrg-hash-to-curve/ section 4.1
+// The sign of an element is not obviously related to that of its Montgomery form
+func g2Sgn0(z *fp.Element) uint64 {
+
+	nonMont := *z
+	nonMont.FromMont()
+
+	return nonMont[0] % 2
+
+}
+
+func g2EvalPolynomial(z *fp.Element, monic bool, coefficients []fp.Element, x *fp.Element) {
+	dst := coefficients[len(coefficients)-1]
+
+	if monic {
+		dst.Add(&dst, x)
+	}
+
+	for i := len(coefficients) - 2; i >= 0; i-- {
+		dst.Mul(&dst, x)
+		dst.Add(&dst, &coefficients[i])
+	}
+
+	z.Set(&dst)
+}
+
+func g2NotZero(x *fp.Element) uint64 {
+
+	return x[0] | x[1] | x[2] | x[3] | x[4] | x[5] | x[6] | x[7] | x[8] | x[9] | x[10] | x[11]
+
+}
+
+func g2NotOne(x *fp.Element) uint64 {
+
+	var one fp.Element
+	return one.SetOne().NotEqual(x)
+
+}
diff --git a/ecc/bw6-756/sswu_g2_test.go b/ecc/bw6-756/sswu_g2_test.go
new file mode 100644
index 000000000..26e8a8aec
--- /dev/null
+++ b/ecc/bw6-756/sswu_g2_test.go
@@ -0,0 +1,146 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package bw6756
+
+import (
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fp"
+
+	"github.com/leanovate/gopter"
+	"github.com/leanovate/gopter/prop"
+	"math/rand"
+	"testing"
+)
+
+func TestG2SqrtRatio(t *testing.T) {
+
+	parameters := gopter.DefaultTestParameters()
+	properties := gopter.NewProperties(parameters)
+	gen := genCoordElemG2(t)
+
+	properties.Property("G2SqrtRatio must square back to the right value", prop.ForAll(
+		func(u fp.Element, v fp.Element) bool {
+
+			var seen fp.Element
+			qr := g2SqrtRatio(&seen, &u, &v) == 0
+
+			seen.
+				Square(&seen).
+				Mul(&seen, &v)
+
+			var ref fp.Element
+			if qr {
+				ref = u
+			} else {
+				g2MulByZ(&ref, &u)
+			}
+
+			return seen.Equal(&ref)
+		}, gen, gen))
+
+	properties.TestingRun(t, gopter.ConsoleReporter(false))
+}
+
+func genCoordElemG2(t *testing.T) gopter.Gen {
+	return func(genParams *gopter.GenParameters) *gopter.GenResult {
+
+		genRandomElem := func() fp.Element {
+			var a fp.Element
+
+			if _, err := a.SetRandom(); err != nil {
+				t.Error(err)
+			}
+
+			return a
+		}
+		a := genRandomElem()
+
+		genResult := gopter.NewGenResult(a, gopter.NoShrinker)
+		return genResult
+	}
+}
+
+func g2TestMatchCoord(t *testing.T, coordName string, msg string, expectedStr string, seen *fp.Element) {
+	var expected fp.Element
+
+	expected.SetString(expectedStr)
+
+	if !expected.Equal(seen) {
+		t.Errorf("mismatch on \"%s\", %s:\n\texpected %s\n\tsaw      %s", msg, coordName, expected.String(), seen)
+	}
+}
+
+func g2TestMatch(t *testing.T, c hashTestCase, seen *G2Affine) {
+	g2TestMatchCoord(t, "x", c.msg, c.x, &seen.X)
+	g2TestMatchCoord(t, "y", c.msg, c.y, &seen.Y)
+}
+
+func TestEncodeToCurveG2SSWU(t *testing.T) {
+
+	for _, c := range g2EncodeToCurveSSWUVector.cases {
+		seen, err := EncodeToCurveG2SSWU([]byte(c.msg), g2EncodeToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g2TestMatch(t, c, &seen)
+	}
+}
+
+func TestHashToCurveG2SSWU(t *testing.T) {
+	for _, c := range g2HashToCurveSSWUVector.cases {
+		seen, err := HashToCurveG2SSWU([]byte(c.msg), g2HashToCurveSSWUVector.dst)
+		if err != nil {
+			t.Fatal(err)
+		}
+		g2TestMatch(t, c, &seen)
+	}
+	t.Log(len(g2HashToCurveSSWUVector.cases), "cases verified")
+}
+
+func BenchmarkG2EncodeToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g2EncodeToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := EncodeToCurveG2SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+func BenchmarkG2HashToCurveSSWU(b *testing.B) {
+	const size = 54
+	bytes := make([]byte, size)
+	dst := g2HashToCurveSSWUVector.dst
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+
+		bytes[rand.Int()%size] = byte(rand.Int())
+
+		if _, err := HashToCurveG2SSWU(bytes, dst); err != nil {
+			b.Fail()
+		}
+	}
+}
+
+var g2HashToCurveSSWUVector hashTestVector
+var g2EncodeToCurveSSWUVector hashTestVector
diff --git a/internal/generator/config/bw6-756.go b/internal/generator/config/bw6-756.go
index 37a8d043f..dcbb1f24b 100644
--- a/internal/generator/config/bw6-756.go
+++ b/internal/generator/config/bw6-756.go
@@ -8,6 +8,7 @@ var BW6_756 = Curve{
 	FpModulus:    "366325390957376286590726555727219947825377821289246188278797409783441745356050456327989347160777465284190855125642086860525706497928518803244008749360363712553766506755227344593404398783886857865261088226271336335268413437902849",
 	G1: Point{
 		CoordType:        "fp.Element",
+		CoordExtDegree:   1,
 		PointName:        "g1",
 		GLV:              true,
 		CofactorCleaning: true,
@@ -16,11 +17,146 @@ var BW6_756 = Curve{
 	},
 	G2: Point{
 		CoordType:        "fp.Element",
+		CoordExtDegree:   1,
 		PointName:        "g2",
 		GLV:              true,
 		CofactorCleaning: true,
 		CRange:           []int{4, 5, 8, 16},
 	},
+	// 2-isogeny
+	HashE1: &HashSuite{
+		A: []string{"0xf76adbb5bb98ade21e8fbe4eef81b6e9756798f2e64bff3cb65781179d6b076b17683f3cd5042655e16802b1a5b1b5b5e386e23e2731d24c843595d5c79ca4b8b9179cf10cb86e782d614a1f78930c25aeacdc30c0008fb417dfffffffff2"},
+		B: []string{"0x16"},
+		Z: []int{11},
+		Isogeny: &Isogeny{
+			XMap: RationalPolynomial{
+				Num: [][]string{
+					{"0xb99024c84cb2829c6f231ad6488f8ecbe73b35d177c03c95a1b5d4aff107d954726713be7dc02432d0d2d3f919a6b98b6ff46f95026f313a3f953d1776abdcc48d809cb0a92e2b7464ed6eab9312f612a13505b1d000072f6798000000000"},
+					{"0x26bd1df0d7ae6c65ccce9c94a71497cd125c27c5fea6d502de92ee64cfe6161e0e05eb4aa16bb7f4c55960f84e03d6b986c05315e0dc296ea61a6ce0bfe7584381b90f97adb14083c7e63f8683ffffb35baf0000000001"},
+					{"0xb99024c84cb282a010dde96a80e9b8571a99e3c121ae77cf5a598f3fd0abd199502d6d31fb5237042160e2f83bbff87df05586dc52cb529ee19d07248b4fbf241ffad1c2a6de71c88e46e4e3dbb1026d5ecafa4e300000000000000000001"},
+				},
+				Den: [][]string{
+					{"0x9af477c35eb9b197333a72529c525f3449709f17fa9b540b7a4bb9933f9858783817ad2a85aedfd3156583e1380f5ae61b014c578370a5ba9869b382ff9d610e06e43e5eb6c5020f1f98fe1a0ffffecd6ebc0000000004"},
+				},
+			},
+			YMap: RationalPolynomial{
+				Num: [][]string{
+					{"0xb99024c84cb282a010dde96a80e9b8571a99e3c121ae77cf5a598f3fd0abd199502d6d31fb5237042160e2f83bbff87df05586dc52cb529ee19d07248b4fbf241ffad1c2a6de71c88e46e4e3dbb1026d5ecafa4e2ffffffffffffffffffff"},
+					{"0x1eed5b76b77315ce6c7800aef7b306952f8658ccae70a80831fd94f251e13a45b7ccc7290e7ae2e14ef34b51df3471733e5650ac56b2e140baada4fc20270074f385fcf816086d73d46b785d5a289f4a69c36293f7ffee097d04000000002"},
+					{"0x7bb56ddaddcc5719024ebf85e3a0a46fefc545c5c0628b194a34c4ba6ac12eab1339f794cfc8e22966cea64f49ee8f4675ef712f878e58793870797ac6d90c77a7cc163e6cef3cd9dd88b97adb140df8fcc7f0d07ffff8d09868000000002"},
+					{"0xd87d803f042598656902e5a6ebbb571049b389b6a74b8bc73ebdd1ca73731f32dd8a54ba4fdfeada26f108cc45b54c92edb91d566097e064073732fff7dd09aa254f4a0dc2ae2f69fb52b5b4804e82d4ee97795b380000000000000000001"},
+				},
+				Den: [][]string{
+					{"0xf76adbb5bb98ae2ac127e1e3568cf5c978cd2fac2ce89fbf23221455163a6ccc6ae73c42a46d9eb02c812ea04faaa0a7eb1cb3d06e646e292cd15edb646a54302aa3c258de7ded0b685e868524ec033c7e63f8683fffffffffffffffffff9"},
+					{"0x3a1bace94385a298b335eadefa9ee3b39b8a3ba8fdfa3f844ddc659737d9212d1508e0eff22193ef280611747505c2164a207ca0d14a3e25f927a3511fdb0465429597638489e0c5abd95f49c5ffff8d09868000000000c"},
+					{"0x1d0dd674a1c2d14c599af56f7d4f71d9cdc51dd47efd1fc226ee32cb9bec90968a847077f910c9f7940308ba3a82e10b25103e5068a51f12fc93d1a88fed8232a14acbb1c244f062d5ecafa4e2ffffc684c34000000000c"},
+				},
+			},
+		},
+	},
+	// 17-isogeny
+	HashE2: &HashSuite{
+		A: []string{"0x2693fe3f93094f745f2f924da1a4260675024d398700e46db4678d6c77e732927310a52e43b73de56540c9dde1f7896d115a7661f0e25e7c54754324cb2ab353f985c8bdaccf0b3ae0e6ee49e40dd2b5d8aee87356b60e17f9de343033534"},
+		B: []string{"0x4577fc0edab719863d99d99fa7737b795dd19e05021cf50e2bfaadf8d3cf9670ba09ab81773af807f37f714475a819c9dd47d9d9b93ec37c70f3b857252cd99cac4151e0b42eb00271779d66c0b0c79e7f450a1fd85b66a23256679146fcb"},
+		Z: []int{11},
+		Isogeny: &Isogeny{
+			XMap: RationalPolynomial{
+				Num: [][]string{
+					{"0x57b49e22a638e7ce6fd84adb98fef2ee37a078dad5e6db136e00515b7db476b3092fe24c634db1c19b32784bd06db0e1bdbcc194bd2ea84b5485485423516adaf53426ca54ef27fe07de0370f3473b05fab20bba48a8ceeb8f39138734a79"},
+					{"0x4eb16246adf77c244d7c96da07c828728ec8cf4f73b36f9d3b8638cd7d5f00932de546ce6d666b6c1c78a99b824336da2aa7cf8017843619aba11c4e20eed632ffbd4ed04676972f223e842b7d9f408585f879993e979cefce07f6b320f98"},
+					{"0xb0fe2b7d5cc6757f09eb475d4ec4eb2892eec65ebb4d0b73dffe388dc8b625ae1094586e7d5f12d55eae19f405323e54346d6e75fe5e30ed86a66f3bd956f2e0cf3b68afc427260f3782828672bc312081de7f76b025cb18780f79ccbb931"},
+					{"0x63ae87b32561b93fb20fe607e846b376b2db30eb8774a0c741bc2c2525053672f753b6ea1b420fc0817c91332c754ee91bbdf25a5e5fb852314dc28a26aef1a6fc6bae25a0285b8355a97b597283baeb152f6a0ff281a7a6029551336d19a"},
+					{"0x8e95e735b9b8a53ec343d0960e4ee6f8d38aaadd78fb413533fb771a3be16d1dfb91d669d0fdf927aa93c60a147c23c6b26b66cab157b3b1381e64646aae66dfc53e2698852470de8fb92e17b4504bc43249a9a2e7330e3b5e25da416f46f"},
+					{"0x5f0f6d5f0ff7ce8777840d1c4d25d85f9a63f04df2479c5b73f1a95a3caf20143e0bf4e9412a9d9d63de7b03be54cfe59522fa2405cd2b02340479f9c8776d6ea9fca2dea8d6d71f455162a6e9f40b76311257170313a8682db61095c7bb5"},
+					{"0xce64c0bb237310b05120e54a6ce7ca932dc2d6411b9761463cf0c31e07d4caf5bac19db735631219b68c1dd00e79f19e2653d050137a01218f5b1371d9a276a8938dfd36a0565662a1c60e970b0ba6ca1e330ca446d4863f5c44cf57dfe70"},
+					{"0x5c79bf1abe91cef7276af031994044074bd33da0fdde5d1356095b3d02fa2d46345e935838131a84e7408ef41afb492c5a5cf8256daa4be842a0c4e9056f1f3eae94b3d47a78956718cec82b21f3e51338e5e2dea165bb52ed3c3fef55e2 "},
+					{"0x528d835a2e6a30781df335a708c54aeb2b71dea9a27564944950b4eb0d8298a44b86f039ac8815883395a340deb72664ce9ef1d01799fe56b8141ffa816ff7ce020b37944db6c438b78a9e78e96ccbc63cff0f7f7e9366ee17fcce48ded23"},
+					{"0x778a6ab417356eb65d79c0a0a1cf6d67f3de72168b1ac6c13f3f619fbb33e1da96b6b4b7f07ab0d82d1702006321158b231b405cfbffe44d033ba590dd218224ea7c800dfa1f00dfc41bc8d4f7d6ae966107bf5589f8ba66cfa90c23216c3"},
+					{"0x4147349d36fc97c1ff5e168c56d665d0b6f1a8a048eb903e134490eac3b6d62e9c4741b77e0290a585bfa7a1c0dc29fe0b8000728ae4d7f01a849fde08e5c268914f0dee8ed42612e1531f958cf84126358cd261871bded7381b133541604"},
+					{"0xc471c9e9bbb09990d5e6c9f5017fe1382bcb0835464ce01b69391ec611babbeca074b376ad1d5ee9c49a2214a89824f408b8eb1dbf80b9851997a2d7f4fc288fb2ee7c3c4fa9d52283e977c924af4c360c5332fce61c340d6af7edef6cfff"},
+					{"0x44d18369bb41e08dea957a700307d9929a58b5565bb4fc9b985b6e98047cf473b636f6afa8ddf5cbf053e02477a616339eaef3bdf402e782850a2dc76555e82e8e803f2aa39f76249929725c6ca5d292d4b10aee3535e3c9d9368f01216da"},
+					{"0xd46505c4491681e80a79fca2f3899ddefc5c765972f1ff4f4d20c6b8b89b0be5decd3e700f31ca2a0a24759dab83fd694a850902b3e14d656e4864b6fc2bf1f4aac08ed4fd147884840cb0cea7f94b0f923fb94ce65f77b5992c71e1e5038"},
+					{"0x6f9632f9f15470710c2004132ef3ad5f5600087a3195f06fa2a1e69c0aea80a94e43c427cd681cd23040668e3aef649d09455fa213ab6364c58327073c8c798963672d092a60943e08d17f05771a42e6abc0d30ef3f1ec16195ff2e6d455d"},
+					{"0x9a51e637dd926867d26d4fea699ddbf56041fb4767c71b5cef365a2a944cb79879168c5a93b9fde6be4d414a4016b01b70c039253548cc08b8309388a1a511ebf1dc8f8b9c436cb21aa944c9240a0c1834fc199be1153717ea4ad6f5cf86 "},
+					{"0x52852129f19b369553182b651e1a006b282bec349935ab9fd2b392c7e43de42da08efb80da828e5bdb508c3a396e6e22a4f596da57126b2c654700cca80014fab35dd74dabc755ea253556e3d6a77d3d93f85b80b56fa4d4011970a59dcc4"},
+					{"0x1c4076fbf2dfd41be9c3e3c24375f6dc8ab18fa163e8f502566471ef2432f6472cfb668c7b4d2e7753eb5181083607b70cac77be553df58c3b26f757f1c70c45474e0c72ac735c9e176ee3f4a10142ce372df6465e1e1e1e1e1e1e1e1e1e2"},
+				},
+				Den: [][]string{
+					{"0x67ca0a05ba18be2717328e9da3d366da18fe119dbf0c150dd6e49daa6ed4f8de92e85a04bc69f045366c80877116759af4bc0c2a6f49ec158f0a2ba5e118ba6d26ac9f2821aa917978d4a606d940397818481adb5ab3f2c9670d2faac21bf"},
+					{"0x461d07431e8397832b6ae980dc75548271ce6a87465b5ea5292316b8b1eb078b69e551472f41df43806e24ad887fcb1f551938ec0efc00499f7c3ba5bf7d8ec6d63a4ddd274aad64d716307d11a0ddb9d1c0f40296317c29455075bffb2ba"},
+					{"0x43635d242d3fb85cfd3a36bfa1e3628afb5e1e14ae9f67f5ab735f8f6723bc48ba9c507c50e703220244876a482f00f4d7f2d022eccac8301995542df93cc151c10b4a622565832b8793de295ab227054118d673efecd6c50fea7b111606e"},
+					{"0x86095d084f72a9be4c483ccc2c746e6ef6e26720fe2700df898dd5a5b20d6a832e2025d2d00a8447279691db55bea69a97e63e4acff6b5bc3254c3fa00017cb0adf4980a85af177485637c02ccc568a182ae232f54eada9391e3d98ed5ffe"},
+					{"0xb017fcf26b80fc4af09015d106339becd5762ccb32eae4554d8eac08772a9335ccb7a8a916bd84caf4bcd4d5e53ad1ddf931deda6d5bc04c2df1682d31c696bf2321a2f005e77db814de79b98353ea4625962b9e30e78311ec0d9825e1f61"},
+					{"0x45fb599a0f0a1dc49bb19803f84cb2e31e0c7c36dbdd187ab5bd8f024061a702d3e9dea95dc499b77fc32f4824fbaf91d82433dafbec16e1c977b20efd359e1334357a6a5b2d4b5c9452e665fc212908db0d18cfc0b5a5f3706b47c8868d1"},
+					{"0x6b33147c7484eee118a499d769e8cd5067e77bae02abf17f8a47b040d39891089ffc19052ed287404aa3e3d2eb7787cd08bb0bfcc1c990a73d649cbfeadd6c4cf9ae2580101c1a1f45a90d32b0ff13e02b5836642634c7e952b6486d29f8 "},
+					{"0x387e570f75f5fa085d262c1cb47c27f872beec94798787b8ea11debe75533ea5b0031eb544968899daacb97792dc67039eabcb6e64559a202f2529af243b2d7f993cf4896d9cd1af66b9cdf2753e483999115c0caa96403d360aa9841aaae"},
+					{"0x1257e076f478ad94029dfb12ae9d928ce4550151c5a02aac92329f2ecfbdbd0a41567dc38e5fe5a0d5d3adcee3d1d84639145de0de9644244c34c031bd5d564a483ca617084e2829bd8e898733dfb99cc3cb655161d5854f687d9592c67de"},
+					{"0xcb293e9849238ea68db183154dd3318b7a97d2c8b5911164a377e42468284ac8f489f97435e2ae5587efc9390ec8fe2dd55a466388491e21b477cf483333fa4f481bade0f0a80dc35c4d967dad4a5aaa1864323a8ab11f376eb6ee6440cd5"},
+					{"0xecfd07101cf66ec654369b1dab1bb19588285083358b3f41fbcfa9a8667bee9f2e009a65b4588590ba71ba35bfbf3f11e0e38cfc02bf8e01475be7ce2fce4a6fcc8aaa3aae1d4696fcf86deff15d543c992e08aa4ef0822fa2da7d3dd82b9"},
+					{"0x505e4b45c2fd0610b6de37cee0cb9a6e5434fe87b45190ff65418b316e4852a031583c3b5c28b4f78e5cdebbd7ac1675f1bbabb6bbd07ffad253349ed13c6a1e605856fc83288739254e877b0cca8c895a2119d0c9371fc59deded45cb1b4"},
+					{"0x4356cbf741137297e9995a239f126c8e69898333c8a0891ee9ae4acb9f66cd1fb8b5c550d54b3d3516d27fc5ebb31b938d50d18a5d1fd92c4fbc246ae3a12886bab0e3b38692d26b5cc13372389c7c65df91ba04dcb040f2c8356bfb78783"},
+					{"0x2d30af585d5961e587be65e1b61869b980d0a9a66510b0013800455621c06ab17bbf7cdb49df67795dba802f4a5dc215300a3a409408de7801f2f0a8e5b3587d478f94c523087fda1b7d9fce00d99ad124ae782dd5a6033e14141ae7af925"},
+					{"0x89ce51dbfa7cf438f4174452a0159618713a55916e86698c00023059f53812346d2e996cd2be2ce603938b6e28add4df16fac31411400aaab469b668d9fc0efe45b04c071a2e09243a6f1c24fd2927823ba1b82803796b4c5ee8d14b6eca "},
+					{"0x60380c3366f74e8a5f5447e6867e4d6b0ca3c6d01e5ad7bfaff113c053f5cada2ab35379044733a1e57cd19cf5ae16240e7bde52e621adad86a454c400381cf47f872d5b7ccf190cddc4a5477a932bd4a5e22534d30913553db82af7238e4"},
+				},
+			},
+			YMap: RationalPolynomial{
+				Num: [][]string{
+					{"0x9f93a11a37f333a5052a497f2ff8959bc7b3e69bd5f470445f01d6071d144dbe992d4ea3783f0fe7d017f5dcc1e6d2980590726773e6b4bd7bc10cd999e2c38c2aabb90c484462c378b64918c1a48e51921fb837ac5feba926c3f623c4160"},
+					{"0x577ef753befab81d08fb0ce7402dc357e7dcd98cd569530135f3ad339845cfcceb84f041cab594864b56060469cc8e0cefa322613061cf50bb7f8b3adba1caa0a522a5df2481f64cc0fbde989a13c70a8168d6cd40a30a342c186f229597 "},
+					{"0xa68f14dfeb538700dd4d74482406ffae6be6396e04519ff32a9d0962b7250cd4a6a14a17fbc36032b9a79cb3f79b06d9918b4a34b8302120baf4757568bd3987d5bad8222ea01116c54237a31599acdc600517d3f733f5150930c07a9b8ac"},
+					{"0x62f40704a8cef5860a7c1a0e953d8ab65ae5e0cb6e49d58da6e2501eabf797a77b043818a3bfa545a51060e24c3b7c2773412e527e1b72acf2044d95548b72836851a635e50c4c24b35c5a769b985fe464d857d16cecf2f8060374067589b"},
+					{"0x1930bec080bcf38ef774f04e675193fc123f266b80e62e1b294b0978abeb31b3afcb02e33f65309399d12e3803fa052098b473eea25be9b9064c8821c0bbb90d9eba711b1dfddf00c2ae407d79382ccad4c6df7d43c160ed257138fdbd403"},
+					{"0x65e5a193d3016af59101e156a7a886a3d15bfebab72190e437dcd4ee1a42a17f739c140aead0cf987e922daaf716fb7b36b61e0996fe6abaa4656b8a7f5b87f41ab28135a032b51957e955ce8efb83242cc62fe08a71dd8065c66720c000d"},
+					{"0x7efada35b7158ff27d88e0ebb8775bb5e3c7f4224e20a223bb2acc78e0176802b998265a7d6473b22171bc9a57bf3943ebf43f71fd0f9006e5a7af6e6e76f11b949e7adb6b7d252bb4c36338ad539a9a7c77cea84280e7cd2964e5591d226"},
+					{"0x29eb281af9b44d21986ba03590afd087e477f3cad3234825f131a31eab2adb59ed830aeab5e5156f8c5de90f81c12d8231604c9e25c4b358501edef95ed2926a79bf118f22d9c4f6455c8c7e37d618c9bc16c46c974437daf821bbd839df9"},
+					{"0x47e261e740726acbe699eac8c486186ec6492f0adc2b3dc99615e874b4b7e093bcdafe1075392c49bca1ed7d4d043e2798c971e73535be10e8f4d6aa23b3bed69de7e75a8d3a8373e8ab0621582fb7cc43d2643095843d47003dccbecfc7d"},
+					{"0xbd7d4e63943690a235389a6bb5d448042414a1fd9dcf42116515f6f7e5dbaf168a9830e70255aefd2bc14ca35eeb922039ab85ba718d3e6d23d86b08381e413ce18ab5c378c43675af0695ee62eb42790c5d5f89c2d0db6460678d5d7d2ed"},
+					{"0x41c2f299e34019cb40e2af5a6bded90f03c9e6e0cbdd853e783f4f3d95935d39ee31fbb61d17610387b84ba9519c16881183a72c59fe7383aa5ae217b42b7480f8f058ad146edf6b4a7549cccea967ded9fd28d150274cee458208907ced2"},
+					{"0x405da38064d76408c3adde67a954882f3c8fd36eabe3201a9dfd75629c8f4036c1d63831453109f54cfbc4cc8b9e5f663d3db51eefcefbce9b533b82e005bd9ee7af344ca3dcc40775a3fd24231ad6aa8ec93782ca5005c54856f0d85c04a"},
+					{"0x5bafa78c01429499eed6e25e9349a6ca3d2e0ac1267e137174d00a6c433c6095e3d1f312fef70a5703698371ddd1c06e302261b4f6069ec4b78ad4e3dcb37b5253c0787566ba225638b446092283e42cbe85907b5ebff3b33c270a868f0c "},
+					{"0x39c152b20238115dd4e6df5508cab1e2c8a7ee9eefdc292f058b6f89586654c8b3586b6f76e713ba51824ca6e2e3ac7a381a8f1746d7d5e500312a7c8513c46a5736425da606f7b9d34fb95df8e54ba0e7cdaef0079df1eced56133412cd "},
+					{"0xfeac73088d841e8e555bfc8117068298c2aa3b88c0172c8339c4c507dc212472eccc77267a6946c0c4c260135b4a5f0665194fdf129bc15fc9623987abc4b4d251feb6696737b390dd48da1ee8eae6123b350c4356ab64e534709643ac84 "},
+					{"0x6a9a250e9bafa906b1ee1517ad74a816a62c5706d13b5a198fbefb1d2d3dbda735841e67a0adc0ad30495670bd2b83953c65b19aa017ee928b6d5d195b3d16590edf554767224c79b07e803db87acc6a54b60d71b15eb75702f9168c5181e"},
+					{"0xe1aeadc4bbd21621ff33dcea2e0ec99a6ea667741cf62af0494afde045b4ccb713836169de252f3018df60a9b42972f3ce650e5fd93a8e06e4fc281fead12a631e834d5753f175a5d76c2c3582caf46cdce6228ce2e767d59573c748ffe55"},
+					{"0x7ed20aa5207c66a812970c473f270e99e5974d86d49be1e1976f669388cd568cf799f5c57d9a251960bad6c97dd3fc335eb51b4852cca361a95fc3eff218be4b9b46bfebfc552521659092156b85da783cc46a24ba5d1c1af9ddf017443b0"},
+					{"0x7235fb0eb41c06b940ccf7b11788831523a2347562d30c2b147b7381f37a4a63dbae4794c8bd7d0f681bf17aa86221277496fd257af5814f73c8303e1ba09fb80ecf6627b00d453dc712e67aec504f7ece5f39d54f49f6212e792ec1b67dc"},
+					{"0xba8193a4d3130aa424d39453c76c8ed262b2e2e0d07f453061b253ce779e1ba0a03f90d00a58f8d08c681204aa771512b68cd12d2dd4e7ee320514a2d5f56a1b8dd2a33c1d4d60a503cade7e0eafd341d8ac667b1c8ccee77ee263808a2c0"},
+					{"0xf58592db221a5b17116ecd2bf86c7cb449dd623e49bf58ccb63baaf1cfbaae68778caa8b2f5d9b9211636f79b7ef12b9aa2155c4ef99276190af85a5121b9beccd7c747e153187c668b1846a179312b7fdccb9fd8273ffd3b058c2a97e99f"},
+					{"0x2fb957ed355d384286b2f6153caad9da4cd75da2f960d2b0162badab4e6bb653f08a4ba1c93813fd4bdc0ed7ff19073696c16685bc73c7924f3f094e19db49cdce7365a991e22911fe27b1ddf631c5f8775210eee3de31057c27f397c5db2"},
+					{"0x9a597c03b1977869ca1a58b04c6c7b5a8136d673428d8077c01fef3fcf1b7661dd2c9a0a9f258d60ac5cddbaddb91dc17e171cb4a48d32d3eaf5be484430496052e4e835781bbdb2f52aa838da2b488143dc2266adbcc6dad98b7bc422c65"},
+					{"0xe19735baa44196f6aa3be94274abd8e8669acdf6e9d985820bf1adf34d66305e0a9c697ae0996b67513553a1a5b6e2c465895925873497d56cb7796a3a3fb58bea27d51180a658bcd7d81c7a86577ed8ed84b61484b70703a5be7ae16fce5"},
+					{"0x3be0b32a85b89ede867e519b45827587ac1c9b3201628e5a499574e5da2edccadf907d9f793c735da5ff0fc3e60d3550653e133c3d1b467b5951ac65f8ac41b50e3a2e76024288847449f0d35d73d783c6a1c167f67a3e01c5894d10d4986"},
+				},
+				Den: [][]string{
+					{"0x6fce5ce514cee5debaed37973056b6a3edfe727211c6e6a1fab570eed4f2a508e96d6b081ff4a1b1a8acdf79fd0bd33c663045955b8cc064b8b92b7bbfd15fec96bb04be80bae195d8d9d51c240630f776a199d13ecad6bc53f75189d614d"},
+					{"0xe0962cd218121ad29e5d7490bf192c86380cb1f4aa91a0e3a86aa73be92a1c3e8a440a35f0248ee807f47a4239a421000433a638d113dca21313ab532984ee2abf3b38d1170fc51c7ae131c57a7449c65cfdd78de9c937528723d0712be77"},
+					{"0x1ccc8c1978c03d019911928c1fafaafbfd603ad5590c6854d54c3ea722f18a19020e256e2d2f66418fd103b42765a5195c910f10097fe452c4ba4b5d5ce5a67ee3adde1a6b2cdf3b7721da38f676e97788cc4c64e2e7962fb81bbdc0f28ba"},
+					{"0xe22f7cbf189dff65136339d090ea28fc60f120cfa26dd8433c6960b418362be2336be5e929dcc61beb4412dcb232db75be87f17b02742c2206620c090b9799e3ed9d988b69316875970f19cf14388a4fde13f176355fd16b702a250a65024"},
+					{"0xe8f52882ce0a0e52ca01be901b93d21fdffae3d31b385750e422cc08cf926c7958a67728e2d26aae7ebf673a840d9d1f8aeb9e58a379f690461f8100817b8185a3e87401b650d473bea9733277bd2166d9f1d7f6fbe6711012bb354d2e636"},
+					{"0x875fe99541a3b4273eb0e06ca68021e7b484844fe1276a3dd6c1ca17fff7bfa81a7fe367de671cbfe1498baea048a898dfd77b2d7ffbd4bbf204f862d8771bd35197f0575f6ddaf2dd8915747726e2aae3ddd49c58e47d732e7dc74aaccfa"},
+					{"0x4dfd80356811a35a65acb00c46d07a392658904a4fc0cbda29046c44a833a31ed9850230c5f4ed0c1e1e76576ad104d193292821897138fdb5700b90c70041f6f5570cba9b4f4d7def07d469b4c892e87d2ec709ba01068510b911e9e8ad3"},
+					{"0xa7f230aa84f2547cbe501721f030ed5bc4376f897cc22d1df86da38d73b27c3903e3e44109d24de87f66a029b5cfd5f68fa440f57de9533515f9912fc130ee39f65d7fb8da98d707cbcb268a3f32cb3b40ab7dcac9128d916a20625e28985"},
+					{"0x7f35f1b0caeeffdd8711317415c54cfc5a1a04ab3cd08e6b4415b78b5760ce5dc99acd7c5a4a4c1238c478461e7b79c4f09bc4b7c9c42468fabcfdf142db1403d5b01a4cd818594dfaf729a18514e62aeb7a3733765836d2a9d893325c7a9"},
+					{"0x9bce845eb9ff707815ed7ed3fb74d199d492671d29a3bf2072594b9154f530a311514634ef4fb471e883c854dde519b85f679d07aea459f7c3870ba31b1bd2c3548152cc0ce70241c24572d7eb65069b1a97f2bd155a4185ef91ea5659b13"},
+					{"0xf0e88310ee09722be8558b6ee541afb4441bc1e77ae82ebcd5d792f6330980c3d36914d73138f0da01be4580c0c2188c0e2a010746b596d80755e71d07028f78bc29af82e7e5be87fb3430b1696810ea95758f3924d846cc0192a1e4f6e10"},
+					{"0x8237fb8a7d102cfa45e7020363d892aa7380d1b5ee84b792c65fc2b167552c67623578c416e9922772f3ec99fddaef25907d91bbf0c72b8f0a0070a4d635a1ccdc78a8c6d4a175ed00a68a1eb0e7b90a6a5f33c6555486c77d4aed026ebaa"},
+					{"0xacc8c17e87078f38e1056204eea4bd9343903c88966d0ddc2ca5166a97a99db4b66f74480eaf6030d42f8f23f00f1e7a1a6d13f086a76f04f14e98f12917de8d7998089c48bd860301813a95f708c6bd0417acadf80cb9831ee34286cfea8"},
+					{"0xa9c3c892d49a5c3e506bfa242431f2b9f3934c51c40045e40478e1f4120da439ba6f0cc154e80bf1ea7ced993b7310545b6af11a6b3bbab611e648afe77297a776ef9468f51706bfbf60a100b53571a0a7361cb592a4f3000a45c013c950f"},
+					{"0xeeade4edfeeeef2bc9f34296787829c99b544f11722d55db8789550c5e005ae27d651137cbdf9cb95ca030e3d84544dd21d7dca37bcab9f0e1ddc4cf633b5986746b0371648bac79367a9512396e9789031acc705cbff8c6d15d828c6bdd8"},
+					{"0x6b6c82e8a5d0faec7af39bbaa40d86ea47acaac61910aaf01c238d8ac0f2587e2f993835b77e0312a3d23a2da61cfe7d68c91af4d0e24dfa29d5ce2c470c29c5ff6d89f060a0c0546daa1287fa3ab18a20cc1666ad84e3343d25997269773"},
+					{"0xdb9102798323f938aa773da2a56ebce58aee6da13e2b10b8a56bea0398ad2d5fa0585d35f7b8b0e9076577c6e36ccf6f9d12e9018bbc8424d38620b874e24d4265bbf27fe0940ddd713e2bf225f0cac7a82c92516c75fc6d88cdbbdd58ec0"},
+					{"0x3ad0edad38f3ac9e11e7c078c6e80cf48879547ea9efe567d0fcf53151b71697643a4940c375e6593b96a3370c56964ea3fc16bb574197886c0d9da0064eeb7e4121296069a4509b10f97a4b4a4943d083dbe0a221465711128d1778f332 "},
+					{"0xc23a66e6ae7cebc4e55e5a301aa0bb92932e809fc326fc2a9e5ca5711241c8c77313b93e8065e8ecfeb3765a53e2b48bbbd6e27b960e76d00dd5f212f51c2381f6571a0a2d1bdb232ae1fea083b5573ebc76b53c5e8c53434a3e0ba80c3e4"},
+					{"0x9dea557e9d9328f93a7a014265d3e7bb51164ef0a4eee3dd1219b918c457f7f0818521d498b13552933f9ead5b0b48d6b3db282c72ec06d7ca1721d3732f3fb59c6c655f3526db0cc833984a0f4957f312abf88133767cad4485c2f9b9af2"},
+					{"0x9497a4421814871b03a96cb5d6b294880db80435d857c2fe9f4e97d75b37c795a1728121d66ce742793f98090d0c894f715a7e81dce84f594369b8bb2b9be0d482436c1ceffdc50efae8d85be760beecec788b5f1b08180dfe69567a0c102"},
+					{"0x9ca415b1068cafcb60219e17f1350a1ba427a276689247978356bc51e19fbc7bde4a260c33e175dfc83ab230d17ed24c88d8de8c145213227d443192eeb2e1e4c61b7be341bd88638d5b44cedf93d445b344552c004f4b02bcc04b4c235e5"},
+					{"0x2a27787477dddb1679fedf03a1e406b8df8cee954e12fd31e027d2d20f5dd6ffe6dc92a5c9ab16be8f21439232809c6187cdae5a5c6fb2f48617d22f6f2a719c5fa6125df02e551fc5c88bf21ff67de5c3cae2f8553bea742fdb7cc29c9cd"},
+					{"0x9054124d1a72f5cf8efe6bd9c9bd742092f5aa382d88439f87e99da07df0b047400cfd35866acd72d83b3a6b7085213615b9cd7c5932848449f67f2600542b6ebf4ac4093b36a5934ca6f7eb37dcc1bef8d337cf3c8d9cffdc944072b5556"},
+				},
+			},
+		},
+	},
 }
 
 func init() {

From ff8906a92d069fe2d5d7706441fe4b1e77ee71ca Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 18 Mar 2022 10:05:49 +0100
Subject: [PATCH 27/29] fix: add bls12-378 to signature package after change

---
 signature/eddsa/eddsa.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/signature/eddsa/eddsa.go b/signature/eddsa/eddsa.go
index a9ea4cf75..04f1dab4c 100644
--- a/signature/eddsa/eddsa.go
+++ b/signature/eddsa/eddsa.go
@@ -20,6 +20,7 @@ import (
 	"io"
 
 	eddsa_bls12377 "github.com/consensys/gnark-crypto/ecc/bls12-377/twistededwards/eddsa"
+	eddsa_bls12378 "github.com/consensys/gnark-crypto/ecc/bls12-378/twistededwards/eddsa"
 	eddsa_bls12381_bandersnatch "github.com/consensys/gnark-crypto/ecc/bls12-381/bandersnatch/eddsa"
 	eddsa_bls12381 "github.com/consensys/gnark-crypto/ecc/bls12-381/twistededwards/eddsa"
 	eddsa_bls24315 "github.com/consensys/gnark-crypto/ecc/bls24-315/twistededwards/eddsa"
@@ -41,6 +42,8 @@ func New(ss twistededwards.ID, r io.Reader) (signature.Signer, error) {
 		return eddsa_bls12381_bandersnatch.GenerateKey(r)
 	case twistededwards.BLS12_377:
 		return eddsa_bls12377.GenerateKey(r)
+	case twistededwards.BLS12_378:
+		return eddsa_bls12378.GenerateKey(r)
 	case twistededwards.BW6_761:
 		return eddsa_bw6761.GenerateKey(r)
 	case twistededwards.BLS24_315:

From 406487649ed769b71424a4eb188bf7006a3c4c5a Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 18 Mar 2022 10:26:50 +0100
Subject: [PATCH 28/29] fix: templating twistededwards for BLS12-378 after
 PR#160

---
 ecc/bls12-378/twistededwards/curve.go         | 67 +++++++++++++++++++
 ecc/bls12-378/twistededwards/eddsa/eddsa.go   | 30 +++------
 .../twistededwards/eddsa/eddsa_test.go        | 25 ++++---
 ecc/bls12-378/twistededwards/point.go         | 29 ++++----
 .../{twistededwards_test.go => point_test.go} | 25 +++----
 .../twistededwards/twistededwards.go          | 64 ------------------
 ecc/twistededwards/twistededwards.go          |  1 +
 internal/generator/config/bls12-378.go        | 15 ++++-
 .../generator/edwards/template/curve.go.tmpl  |  5 +-
 9 files changed, 132 insertions(+), 129 deletions(-)
 create mode 100644 ecc/bls12-378/twistededwards/curve.go
 rename ecc/bls12-378/twistededwards/{twistededwards_test.go => point_test.go} (97%)
 delete mode 100644 ecc/bls12-378/twistededwards/twistededwards.go

diff --git a/ecc/bls12-378/twistededwards/curve.go b/ecc/bls12-378/twistededwards/curve.go
new file mode 100644
index 000000000..260f7f74b
--- /dev/null
+++ b/ecc/bls12-378/twistededwards/curve.go
@@ -0,0 +1,67 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element
+	Cofactor fr.Element
+	Order    big.Int
+	Base     PointAffine
+}
+
+// GetEdwardsCurve returns the twisted Edwards curve on bls12-378/Fr
+func GetEdwardsCurve() CurveParams {
+	initOnce.Do(initCurveParams)
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&curveParams.A)
+	res.D.Set(&curveParams.D)
+	res.Cofactor.Set(&curveParams.Cofactor)
+	res.Order.Set(&curveParams.Order)
+	res.Base.Set(&curveParams.Base)
+
+	return res
+}
+
+var (
+	initOnce    sync.Once
+	curveParams CurveParams
+)
+
+func initCurveParams() {
+	curveParams.A.SetString("16249")
+	curveParams.D.SetString("826857503717340716663906603396009292766308904506333520048618402505612607353")
+	curveParams.Cofactor.SetString("8")
+	curveParams.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
+
+	curveParams.Base.X.SetString("6772953896463446981848394912418300623023000177913479948380771331313783560843")
+	curveParams.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
+}
+
+// mulByA multiplies fr.Element by curveParams.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &curveParams.A)
+}
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa.go b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
index 00f78b442..2fa159bad 100644
--- a/ecc/bls12-378/twistededwards/eddsa/eddsa.go
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa.go
@@ -58,12 +58,8 @@ type Signature struct {
 	S [sizeFr]byte
 }
 
-func init() {
-	signature.Register(signature.EDDSA_BLS12_378, GenerateKeyInterfaces)
-}
-
 // GenerateKey generates a public and private key pair.
-func GenerateKey(r io.Reader) (PrivateKey, error) {
+func GenerateKey(r io.Reader) (*PrivateKey, error) {
 
 	c := twistededwards.GetEdwardsCurve()
 
@@ -74,7 +70,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	seed := make([]byte, 32)
 	_, err := r.Read(seed)
 	if err != nil {
-		return priv, err
+		return nil, err
 	}
 	h := blake2b.Sum512(seed[:])
 	for i := 0; i < 32; i++ {
@@ -104,25 +100,21 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 
 	priv.PublicKey = pub
 
-	return priv, nil
-}
-
-// GenerateKeyInterfaces generate interfaces for the public/private key.
-// This purpose of this function is to be registered in the list of signature schemes.
-func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
-	priv, err := GenerateKey(r)
-	return &priv, err
+	return &priv, nil
 }
 
 // Equal compares 2 public keys
-func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+func (pub *PublicKey) Equal(x signature.PublicKey) bool {
+	xx, ok := x.(*PublicKey)
+	if !ok {
+		return false
+	}
 	bpk := pub.Bytes()
-	bother := other.Bytes()
-	return subtle.ConstantTimeCompare(bpk, bother) == 1
+	bxx := xx.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bxx) == 1
 }
 
 // Public returns the public key associated to the private key.
-// From Signer interface defined in gnark/crypto/signature.
 func (privKey *PrivateKey) Public() signature.PublicKey {
 	var pub PublicKey
 	pub.A.Set(&privKey.PublicKey.A)
@@ -238,7 +230,7 @@ func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, err
 	// lhs = cofactor*S*Base
 	var lhs twistededwards.PointAffine
 	var bCofactor, bs big.Int
-	curveParams.Cofactor.ToBigInt(&bCofactor)
+	curveParams.Cofactor.ToBigIntRegular(&bCofactor)
 	bs.SetBytes(sig.S[:])
 	lhs.ScalarMul(&curveParams.Base, &bs).
 		ScalarMul(&lhs, &bCofactor)
diff --git a/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
index 44864e266..5cfc0927f 100644
--- a/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
+++ b/ecc/bls12-378/twistededwards/eddsa/eddsa_test.go
@@ -27,7 +27,6 @@ import (
 
 	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
 	"github.com/consensys/gnark-crypto/hash"
-	"github.com/consensys/gnark-crypto/signature"
 )
 
 func Example() {
@@ -35,8 +34,8 @@ func Example() {
 	hFunc := hash.MIMC_BLS12_378.New()
 
 	// create a eddsa key pair
-	privateKey, _ := signature.EDDSA_BLS12_378.New(crand.Reader)
-	publicKey := privateKey.Public()
+	privateKey, _ := GenerateKey(crand.Reader)
+	publicKey := privateKey.PublicKey
 
 	// note that the message is on 4 bytes
 	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
@@ -60,17 +59,17 @@ func TestSerialization(t *testing.T) {
 	src := rand.NewSource(0)
 	r := rand.New(src)
 
-	privKey1, err := signature.EDDSA_BLS12_378.New(r)
+	privKey1, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	pubKey1 := privKey1.Public()
+	pubKey1 := privKey1.PublicKey
 
-	privKey2, err := signature.EDDSA_BLS12_378.New(r)
+	privKey2, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	pubKey2 := privKey2.Public()
+	pubKey2 := privKey2.PublicKey
 
 	pubKeyBin1 := pubKey1.Bytes()
 	pubKey2.SetBytes(pubKeyBin1)
@@ -103,11 +102,11 @@ func TestEddsaMIMC(t *testing.T) {
 	r := rand.New(src)
 
 	// create eddsa obj and sign a message
-	privKey, err := signature.EDDSA_BLS12_378.New(r)
+	privKey, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(nil)
 	}
-	pubKey := privKey.Public()
+	pubKey := privKey.PublicKey
 	hFunc := hash.MIMC_BLS12_378.New()
 
 	var frMsg fr.Element
@@ -150,8 +149,8 @@ func TestEddsaSHA256(t *testing.T) {
 	// create eddsa obj and sign a message
 	// create eddsa obj and sign a message
 
-	privKey, err := signature.EDDSA_BLS12_378.New(r)
-	pubKey := privKey.Public()
+	privKey, err := GenerateKey(r)
+	pubKey := privKey.PublicKey
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -191,8 +190,8 @@ func BenchmarkVerify(b *testing.B) {
 	hFunc := hash.MIMC_BLS12_378.New()
 
 	// create eddsa obj and sign a message
-	privKey, err := signature.EDDSA_BLS12_378.New(r)
-	pubKey := privKey.Public()
+	privKey, err := GenerateKey(r)
+	pubKey := privKey.PublicKey
 	if err != nil {
 		b.Fatal(err)
 	}
diff --git a/ecc/bls12-378/twistededwards/point.go b/ecc/bls12-378/twistededwards/point.go
index e49461298..a3f8e5a87 100644
--- a/ecc/bls12-378/twistededwards/point.go
+++ b/ecc/bls12-378/twistededwards/point.go
@@ -84,12 +84,14 @@ func (p *PointAffine) Marshal() []byte {
 }
 
 func computeX(y *fr.Element) (x fr.Element) {
+	initOnce.Do(initCurveParams)
+
 	var one, num, den fr.Element
 	one.SetOne()
 	num.Square(y)
-	den.Mul(&num, &edwards.D)
+	den.Mul(&num, &curveParams.D)
 	num.Sub(&one, &num)
-	den.Sub(&edwards.A, &den)
+	den.Sub(&curveParams.A, &den)
 	x.Div(&num, &den)
 	x.Sqrt(&x)
 	return
@@ -403,7 +405,6 @@ func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
 // ScalarMul scalar multiplication of a point
 // p1 in projective coordinates with a scalar in big.Int
 func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
-
 	var _scalar big.Int
 	_scalar.Set(scalar)
 	p.Set(p1)
@@ -478,10 +479,8 @@ func (p *PointExtended) FromAffine(p1 *PointAffine) *PointExtended {
 }
 
 // Add adds points in extended coordinates
-// dedicated addition
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
+// See https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
 func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
-
 	if p1.Equal(p2) {
 		p.Double(p1)
 		return p
@@ -498,8 +497,9 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 		Mul(&F, &tmp).
 		Add(&F, &B).
 		Sub(&F, &A)
-	mulByA(&A)
-	G.Add(&A, &B)
+	G.Set(&A)
+	mulByA(&G)
+	G.Add(&G, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&E, &F)
@@ -511,9 +511,8 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 }
 
 // MixedAdd adds a point in extended coordinates to a point in affine coordinates
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
+// See https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
 func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExtended {
-
 	var A, B, C, D, E, F, G, H, tmp fr.Element
 
 	A.Mul(&p2.X, &p1.Z)
@@ -535,14 +534,15 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 		Mul(&F, &tmp).
 		Add(&F, &B).
 		Sub(&F, &A)
-	mulByA(&A)
-	G.Add(&A, &B)
+	G.Set(&A)
+	mulByA(&G)
+	G.Add(&G, &B)
 	H.Sub(&D, &C)
 
-	p.X.Mul(&F, &E)
+	p.X.Mul(&E, &F)
 	p.Y.Mul(&G, &H)
 	p.T.Mul(&E, &H)
-	p.Z.Mul(&G, &F)
+	p.Z.Mul(&F, &G)
 
 	return p
 }
@@ -618,7 +618,6 @@ func (p *PointExtended) setInfinity() *PointExtended {
 // ScalarMul scalar multiplication of a point
 // p1 in extended coordinates with a scalar in big.Int
 func (p *PointExtended) ScalarMul(p1 *PointExtended, scalar *big.Int) *PointExtended {
-
 	var _scalar big.Int
 	_scalar.Set(scalar)
 	p.Set(p1)
diff --git a/ecc/bls12-378/twistededwards/twistededwards_test.go b/ecc/bls12-378/twistededwards/point_test.go
similarity index 97%
rename from ecc/bls12-378/twistededwards/twistededwards_test.go
rename to ecc/bls12-378/twistededwards/point_test.go
index cafca3344..46f0cddd0 100644
--- a/ecc/bls12-378/twistededwards/twistededwards_test.go
+++ b/ecc/bls12-378/twistededwards/point_test.go
@@ -605,16 +605,17 @@ func TestOps(t *testing.T) {
 }
 
 func TestMarshal(t *testing.T) {
+	initOnce.Do(initCurveParams)
 
 	var point, unmarshalPoint PointAffine
-	point.Set(&edwards.Base)
+	point.Set(&curveParams.Base)
 	for i := 0; i < 20; i++ {
 		b := point.Marshal()
 		unmarshalPoint.Unmarshal(b)
 		if !point.Equal(&unmarshalPoint) {
 			t.Fatal("error unmarshal(marshal(point))")
 		}
-		point.Add(&point, &edwards.Base)
+		point.Add(&point, &curveParams.Base)
 	}
 }
 
@@ -647,12 +648,10 @@ func BenchmarkScalarMulExtended(b *testing.B) {
 
 	var doubleAndAdd PointExtended
 
-	b.Run("double and add", func(b *testing.B) {
-		b.ResetTimer()
-		for j := 0; j < b.N; j++ {
-			doubleAndAdd.ScalarMul(&a, &s)
-		}
-	})
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		doubleAndAdd.ScalarMul(&a, &s)
+	}
 }
 
 func BenchmarkScalarMulProjective(b *testing.B) {
@@ -665,10 +664,8 @@ func BenchmarkScalarMulProjective(b *testing.B) {
 
 	var doubleAndAdd PointProj
 
-	b.Run("double and add", func(b *testing.B) {
-		b.ResetTimer()
-		for j := 0; j < b.N; j++ {
-			doubleAndAdd.ScalarMul(&a, &s)
-		}
-	})
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		doubleAndAdd.ScalarMul(&a, &s)
+	}
 }
diff --git a/ecc/bls12-378/twistededwards/twistededwards.go b/ecc/bls12-378/twistededwards/twistededwards.go
deleted file mode 100644
index 676838f24..000000000
--- a/ecc/bls12-378/twistededwards/twistededwards.go
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
-Copyright © 2020 ConsenSys
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package twistededwards
-
-import (
-	"math/big"
-
-	"github.com/consensys/gnark-crypto/ecc/bls12-378/fr"
-)
-
-// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
-type CurveParams struct {
-	A, D     fr.Element // in Montgomery form
-	Cofactor fr.Element // not in Montgomery form
-	Order    big.Int
-	Base     PointAffine
-}
-
-var edwards CurveParams
-
-// GetEdwardsCurve returns the twisted Edwards curve on BLS12-378's Fr
-func GetEdwardsCurve() CurveParams {
-
-	// copy to keep Order private
-	var res CurveParams
-
-	res.A.Set(&edwards.A)
-	res.D.Set(&edwards.D)
-	res.Cofactor.Set(&edwards.Cofactor)
-	res.Order.Set(&edwards.Order)
-	res.Base.Set(&edwards.Base)
-
-	return res
-}
-
-func init() {
-
-	edwards.A.SetString("16249")
-	edwards.D.SetString("826857503717340716663906603396009292766308904506333520048618402505612607353")
-	edwards.Cofactor.SetUint64(8).FromMont()
-	edwards.Order.SetString("1860429383364016612493789857641020908721690454530426945748883177201355593303", 10)
-
-	edwards.Base.X.SetString("6772953896463446981848394912418300623023000177913479948380771331313783560843")
-	edwards.Base.Y.SetString("9922290044608088599966879240752111513195706854076002240583420830067351093249")
-}
-
-// mulByA multiplies fr.Element by edwards.A
-func mulByA(x *fr.Element) {
-	x.Mul(x, &edwards.A)
-}
diff --git a/ecc/twistededwards/twistededwards.go b/ecc/twistededwards/twistededwards.go
index 416e1d637..ea8c4de36 100644
--- a/ecc/twistededwards/twistededwards.go
+++ b/ecc/twistededwards/twistededwards.go
@@ -8,6 +8,7 @@ const (
 	UNKNOWN ID = iota
 	BN254
 	BLS12_377
+	BLS12_378
 	BLS12_381
 	BLS12_381_BANDERSNATCH
 	BLS24_315
diff --git a/internal/generator/config/bls12-378.go b/internal/generator/config/bls12-378.go
index 3abc1c453..e7a62231d 100644
--- a/internal/generator/config/bls12-378.go
+++ b/internal/generator/config/bls12-378.go
@@ -23,6 +23,7 @@ var BLS12_378 = Curve{
 		CRange:           defaultCRange(),
 		Projective:       true,
 	},
+	// 2-isogeny
 	HashE1: &HashSuite{
 		A: []string{"0x3eeb0416684d18f2c41f0ac56b4172c97877b1f2170ca6f42387dd67a2cc5c175e179b1a06ffff79e0723fffffffff2"},
 		B: []string{"0x16"},
@@ -55,7 +56,19 @@ var BLS12_378 = Curve{
 	},
 }
 
+var tBLS12_78 = TwistedEdwardsCurve{
+	Name:     BLS12_378.Name,
+	Package:  "twistededwards",
+	EnumID:   BLS12_378.EnumID,
+	A:        "16249",
+	D:        "826857503717340716663906603396009292766308904506333520048618402505612607353",
+	Cofactor: "8",
+	Order:    "1860429383364016612493789857641020908721690454530426945748883177201355593303",
+	BaseX:    "6772953896463446981848394912418300623023000177913479948380771331313783560843",
+	BaseY:    "9922290044608088599966879240752111513195706854076002240583420830067351093249",
+}
+
 func init() {
 	addCurve(&BLS12_378)
-
+	addTwistedEdwardCurve(&tBLS12_78)
 }
diff --git a/internal/generator/edwards/template/curve.go.tmpl b/internal/generator/edwards/template/curve.go.tmpl
index 69444b344..6d47f8fad 100644
--- a/internal/generator/edwards/template/curve.go.tmpl
+++ b/internal/generator/edwards/template/curve.go.tmpl
@@ -19,7 +19,7 @@ type CurveParams struct {
 
 	{{- if .HasEndomorphism}}
 	// endomorphism
-	endo     [2]fr.Element 
+	endo     [2]fr.Element
 	lambda   big.Int
 	glvBasis ecc.Lattice
 	{{- end}}
@@ -79,7 +79,6 @@ func mulByA(x *fr.Element) {
 		x.Neg(x)
 		fr.MulBy5(x)
 	{{- else }}
-		NOT IMPLEMENTED
-		panic("not implementeed")
+        x.Mul(x, &curveParams.A)
 	{{- end}}
 }

From fa8c1b74c41b238c6f7373909803c9c763963a41 Mon Sep 17 00:00:00 2001
From: Youssef El Housni <youssef.housni21@gmail.com>
Date: Fri, 18 Mar 2022 11:18:21 +0100
Subject: [PATCH 29/29] fix: templating twistededwards for BW6-756 after PR#160

---
 ecc/bw6-756/twistededwards/curve.go           | 67 +++++++++++++++++++
 ecc/bw6-756/twistededwards/eddsa/eddsa.go     | 30 +++------
 .../twistededwards/eddsa/eddsa_test.go        | 25 ++++---
 ecc/bw6-756/twistededwards/point.go           | 29 ++++----
 .../{twistededwards_test.go => point_test.go} | 25 +++----
 ecc/bw6-756/twistededwards/twistededwards.go  | 63 -----------------
 ecc/twistededwards/twistededwards.go          |  1 +
 internal/generator/config/bw6-756.go          | 13 ++++
 8 files changed, 129 insertions(+), 124 deletions(-)
 create mode 100644 ecc/bw6-756/twistededwards/curve.go
 rename ecc/bw6-756/twistededwards/{twistededwards_test.go => point_test.go} (97%)
 delete mode 100644 ecc/bw6-756/twistededwards/twistededwards.go

diff --git a/ecc/bw6-756/twistededwards/curve.go b/ecc/bw6-756/twistededwards/curve.go
new file mode 100644
index 000000000..9c95c4f21
--- /dev/null
+++ b/ecc/bw6-756/twistededwards/curve.go
@@ -0,0 +1,67 @@
+// Copyright 2020 ConsenSys Software Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by consensys/gnark-crypto DO NOT EDIT
+
+package twistededwards
+
+import (
+	"math/big"
+	"sync"
+
+	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
+)
+
+// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
+type CurveParams struct {
+	A, D     fr.Element
+	Cofactor fr.Element
+	Order    big.Int
+	Base     PointAffine
+}
+
+// GetEdwardsCurve returns the twisted Edwards curve on bw6-756/Fr
+func GetEdwardsCurve() CurveParams {
+	initOnce.Do(initCurveParams)
+	// copy to keep Order private
+	var res CurveParams
+
+	res.A.Set(&curveParams.A)
+	res.D.Set(&curveParams.D)
+	res.Cofactor.Set(&curveParams.Cofactor)
+	res.Order.Set(&curveParams.Order)
+	res.Base.Set(&curveParams.Base)
+
+	return res
+}
+
+var (
+	initOnce    sync.Once
+	curveParams CurveParams
+)
+
+func initCurveParams() {
+	curveParams.A.SetString("35895")
+	curveParams.D.SetString("35894")
+	curveParams.Cofactor.SetString("8")
+	curveParams.Order.SetString("75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493", 10)
+
+	curveParams.Base.X.SetString("357240753431396842603421262238241571158569743053156052278371293545344505472364896271378029423975465332156840775830")
+	curveParams.Base.Y.SetString("279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501")
+}
+
+// mulByA multiplies fr.Element by curveParams.A
+func mulByA(x *fr.Element) {
+	x.Mul(x, &curveParams.A)
+}
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa.go b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
index f5ca9d161..7f0e42a31 100644
--- a/ecc/bw6-756/twistededwards/eddsa/eddsa.go
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa.go
@@ -58,12 +58,8 @@ type Signature struct {
 	S [sizeFr]byte
 }
 
-func init() {
-	signature.Register(signature.EDDSA_BW6_756, GenerateKeyInterfaces)
-}
-
 // GenerateKey generates a public and private key pair.
-func GenerateKey(r io.Reader) (PrivateKey, error) {
+func GenerateKey(r io.Reader) (*PrivateKey, error) {
 
 	c := twistededwards.GetEdwardsCurve()
 
@@ -80,7 +76,7 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 	seed := make([]byte, 32)
 	_, err := r.Read(seed)
 	if err != nil {
-		return priv, err
+		return nil, err
 	}
 	h1 := blake2b.Sum512(seed[:])
 
@@ -113,25 +109,21 @@ func GenerateKey(r io.Reader) (PrivateKey, error) {
 
 	priv.PublicKey = pub
 
-	return priv, nil
-}
-
-// GenerateKeyInterfaces generate interfaces for the public/private key.
-// This purpose of this function is to be registered in the list of signature schemes.
-func GenerateKeyInterfaces(r io.Reader) (signature.Signer, error) {
-	priv, err := GenerateKey(r)
-	return &priv, err
+	return &priv, nil
 }
 
 // Equal compares 2 public keys
-func (pub *PublicKey) Equal(other signature.PublicKey) bool {
+func (pub *PublicKey) Equal(x signature.PublicKey) bool {
+	xx, ok := x.(*PublicKey)
+	if !ok {
+		return false
+	}
 	bpk := pub.Bytes()
-	bother := other.Bytes()
-	return subtle.ConstantTimeCompare(bpk, bother) == 1
+	bxx := xx.Bytes()
+	return subtle.ConstantTimeCompare(bpk, bxx) == 1
 }
 
 // Public returns the public key associated to the private key.
-// From Signer interface defined in gnark/crypto/signature.
 func (privKey *PrivateKey) Public() signature.PublicKey {
 	var pub PublicKey
 	pub.A.Set(&privKey.PublicKey.A)
@@ -247,7 +239,7 @@ func (pub *PublicKey) Verify(sigBin, message []byte, hFunc hash.Hash) (bool, err
 	// lhs = cofactor*S*Base
 	var lhs twistededwards.PointAffine
 	var bCofactor, bs big.Int
-	curveParams.Cofactor.ToBigInt(&bCofactor)
+	curveParams.Cofactor.ToBigIntRegular(&bCofactor)
 	bs.SetBytes(sig.S[:])
 	lhs.ScalarMul(&curveParams.Base, &bs).
 		ScalarMul(&lhs, &bCofactor)
diff --git a/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
index 20be1041a..8cc3ed1f4 100644
--- a/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
+++ b/ecc/bw6-756/twistededwards/eddsa/eddsa_test.go
@@ -27,7 +27,6 @@ import (
 
 	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
 	"github.com/consensys/gnark-crypto/hash"
-	"github.com/consensys/gnark-crypto/signature"
 )
 
 func Example() {
@@ -35,8 +34,8 @@ func Example() {
 	hFunc := hash.MIMC_BW6_756.New()
 
 	// create a eddsa key pair
-	privateKey, _ := signature.EDDSA_BW6_756.New(crand.Reader)
-	publicKey := privateKey.Public()
+	privateKey, _ := GenerateKey(crand.Reader)
+	publicKey := privateKey.PublicKey
 
 	// note that the message is on 4 bytes
 	msg := []byte{0xde, 0xad, 0xf0, 0x0d}
@@ -60,17 +59,17 @@ func TestSerialization(t *testing.T) {
 	src := rand.NewSource(0)
 	r := rand.New(src)
 
-	privKey1, err := signature.EDDSA_BW6_756.New(r)
+	privKey1, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	pubKey1 := privKey1.Public()
+	pubKey1 := privKey1.PublicKey
 
-	privKey2, err := signature.EDDSA_BW6_756.New(r)
+	privKey2, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(err)
 	}
-	pubKey2 := privKey2.Public()
+	pubKey2 := privKey2.PublicKey
 
 	pubKeyBin1 := pubKey1.Bytes()
 	pubKey2.SetBytes(pubKeyBin1)
@@ -103,11 +102,11 @@ func TestEddsaMIMC(t *testing.T) {
 	r := rand.New(src)
 
 	// create eddsa obj and sign a message
-	privKey, err := signature.EDDSA_BW6_756.New(r)
+	privKey, err := GenerateKey(r)
 	if err != nil {
 		t.Fatal(nil)
 	}
-	pubKey := privKey.Public()
+	pubKey := privKey.PublicKey
 	hFunc := hash.MIMC_BW6_756.New()
 
 	var frMsg fr.Element
@@ -150,8 +149,8 @@ func TestEddsaSHA256(t *testing.T) {
 	// create eddsa obj and sign a message
 	// create eddsa obj and sign a message
 
-	privKey, err := signature.EDDSA_BW6_756.New(r)
-	pubKey := privKey.Public()
+	privKey, err := GenerateKey(r)
+	pubKey := privKey.PublicKey
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -191,8 +190,8 @@ func BenchmarkVerify(b *testing.B) {
 	hFunc := hash.MIMC_BW6_756.New()
 
 	// create eddsa obj and sign a message
-	privKey, err := signature.EDDSA_BW6_756.New(r)
-	pubKey := privKey.Public()
+	privKey, err := GenerateKey(r)
+	pubKey := privKey.PublicKey
 	if err != nil {
 		b.Fatal(err)
 	}
diff --git a/ecc/bw6-756/twistededwards/point.go b/ecc/bw6-756/twistededwards/point.go
index fadc596dd..8fd649c95 100644
--- a/ecc/bw6-756/twistededwards/point.go
+++ b/ecc/bw6-756/twistededwards/point.go
@@ -84,12 +84,14 @@ func (p *PointAffine) Marshal() []byte {
 }
 
 func computeX(y *fr.Element) (x fr.Element) {
+	initOnce.Do(initCurveParams)
+
 	var one, num, den fr.Element
 	one.SetOne()
 	num.Square(y)
-	den.Mul(&num, &edwards.D)
+	den.Mul(&num, &curveParams.D)
 	num.Sub(&one, &num)
-	den.Sub(&edwards.A, &den)
+	den.Sub(&curveParams.A, &den)
 	x.Div(&num, &den)
 	x.Sqrt(&x)
 	return
@@ -403,7 +405,6 @@ func (p *PointProj) Add(p1, p2 *PointProj) *PointProj {
 // ScalarMul scalar multiplication of a point
 // p1 in projective coordinates with a scalar in big.Int
 func (p *PointProj) ScalarMul(p1 *PointProj, scalar *big.Int) *PointProj {
-
 	var _scalar big.Int
 	_scalar.Set(scalar)
 	p.Set(p1)
@@ -478,10 +479,8 @@ func (p *PointExtended) FromAffine(p1 *PointAffine) *PointExtended {
 }
 
 // Add adds points in extended coordinates
-// dedicated addition
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
+// See https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-add-2008-hwcd-2
 func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
-
 	if p1.Equal(p2) {
 		p.Double(p1)
 		return p
@@ -498,8 +497,9 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 		Mul(&F, &tmp).
 		Add(&F, &B).
 		Sub(&F, &A)
-	mulByA(&A)
-	G.Add(&A, &B)
+	G.Set(&A)
+	mulByA(&G)
+	G.Add(&G, &B)
 	H.Sub(&D, &C)
 
 	p.X.Mul(&E, &F)
@@ -511,9 +511,8 @@ func (p *PointExtended) Add(p1, p2 *PointExtended) *PointExtended {
 }
 
 // MixedAdd adds a point in extended coordinates to a point in affine coordinates
-// https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
+// See https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html#addition-madd-2008-hwcd-2
 func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExtended {
-
 	var A, B, C, D, E, F, G, H, tmp fr.Element
 
 	A.Mul(&p2.X, &p1.Z)
@@ -535,14 +534,15 @@ func (p *PointExtended) MixedAdd(p1 *PointExtended, p2 *PointAffine) *PointExten
 		Mul(&F, &tmp).
 		Add(&F, &B).
 		Sub(&F, &A)
-	mulByA(&A)
-	G.Add(&A, &B)
+	G.Set(&A)
+	mulByA(&G)
+	G.Add(&G, &B)
 	H.Sub(&D, &C)
 
-	p.X.Mul(&F, &E)
+	p.X.Mul(&E, &F)
 	p.Y.Mul(&G, &H)
 	p.T.Mul(&E, &H)
-	p.Z.Mul(&G, &F)
+	p.Z.Mul(&F, &G)
 
 	return p
 }
@@ -618,7 +618,6 @@ func (p *PointExtended) setInfinity() *PointExtended {
 // ScalarMul scalar multiplication of a point
 // p1 in extended coordinates with a scalar in big.Int
 func (p *PointExtended) ScalarMul(p1 *PointExtended, scalar *big.Int) *PointExtended {
-
 	var _scalar big.Int
 	_scalar.Set(scalar)
 	p.Set(p1)
diff --git a/ecc/bw6-756/twistededwards/twistededwards_test.go b/ecc/bw6-756/twistededwards/point_test.go
similarity index 97%
rename from ecc/bw6-756/twistededwards/twistededwards_test.go
rename to ecc/bw6-756/twistededwards/point_test.go
index e8fad9e1f..3410420c9 100644
--- a/ecc/bw6-756/twistededwards/twistededwards_test.go
+++ b/ecc/bw6-756/twistededwards/point_test.go
@@ -605,16 +605,17 @@ func TestOps(t *testing.T) {
 }
 
 func TestMarshal(t *testing.T) {
+	initOnce.Do(initCurveParams)
 
 	var point, unmarshalPoint PointAffine
-	point.Set(&edwards.Base)
+	point.Set(&curveParams.Base)
 	for i := 0; i < 20; i++ {
 		b := point.Marshal()
 		unmarshalPoint.Unmarshal(b)
 		if !point.Equal(&unmarshalPoint) {
 			t.Fatal("error unmarshal(marshal(point))")
 		}
-		point.Add(&point, &edwards.Base)
+		point.Add(&point, &curveParams.Base)
 	}
 }
 
@@ -647,12 +648,10 @@ func BenchmarkScalarMulExtended(b *testing.B) {
 
 	var doubleAndAdd PointExtended
 
-	b.Run("double and add", func(b *testing.B) {
-		b.ResetTimer()
-		for j := 0; j < b.N; j++ {
-			doubleAndAdd.ScalarMul(&a, &s)
-		}
-	})
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		doubleAndAdd.ScalarMul(&a, &s)
+	}
 }
 
 func BenchmarkScalarMulProjective(b *testing.B) {
@@ -665,10 +664,8 @@ func BenchmarkScalarMulProjective(b *testing.B) {
 
 	var doubleAndAdd PointProj
 
-	b.Run("double and add", func(b *testing.B) {
-		b.ResetTimer()
-		for j := 0; j < b.N; j++ {
-			doubleAndAdd.ScalarMul(&a, &s)
-		}
-	})
+	b.ResetTimer()
+	for j := 0; j < b.N; j++ {
+		doubleAndAdd.ScalarMul(&a, &s)
+	}
 }
diff --git a/ecc/bw6-756/twistededwards/twistededwards.go b/ecc/bw6-756/twistededwards/twistededwards.go
deleted file mode 100644
index c24f0261a..000000000
--- a/ecc/bw6-756/twistededwards/twistededwards.go
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-Copyright © 2020 ConsenSys
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package twistededwards
-
-import (
-	"math/big"
-
-	"github.com/consensys/gnark-crypto/ecc/bw6-756/fr"
-)
-
-// CurveParams curve parameters: ax^2 + y^2 = 1 + d*x^2*y^2
-type CurveParams struct {
-	A, D     fr.Element // in Montgomery form
-	Cofactor fr.Element // not in Montgomery form
-	Order    big.Int
-	Base     PointAffine
-}
-
-var edwards CurveParams
-
-// GetEdwardsCurve returns the twisted Edwards curve on BW6-756's Fr
-func GetEdwardsCurve() CurveParams {
-	// copy to keep Order private
-	var res CurveParams
-
-	res.A.Set(&edwards.A)
-	res.D.Set(&edwards.D)
-	res.Cofactor.Set(&edwards.Cofactor)
-	res.Order.Set(&edwards.Order)
-	res.Base.Set(&edwards.Base)
-
-	return res
-}
-
-func init() {
-
-	edwards.A.SetUint64(35895)
-	edwards.D.SetUint64(35894)
-	edwards.Cofactor.SetUint64(8).FromMont()
-	edwards.Order.SetString("75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493", 10)
-
-	edwards.Base.X.SetString("357240753431396842603421262238241571158569743053156052278371293545344505472364896271378029423975465332156840775830")
-	edwards.Base.Y.SetString("279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501")
-}
-
-// mulByA multiplies fr.Element by edwards.A
-func mulByA(x *fr.Element) {
-	x.Mul(x, &edwards.A)
-}
diff --git a/ecc/twistededwards/twistededwards.go b/ecc/twistededwards/twistededwards.go
index ea8c4de36..613164950 100644
--- a/ecc/twistededwards/twistededwards.go
+++ b/ecc/twistededwards/twistededwards.go
@@ -13,5 +13,6 @@ const (
 	BLS12_381_BANDERSNATCH
 	BLS24_315
 	BW6_761
+	BW6_756
 	BW6_633
 )
diff --git a/internal/generator/config/bw6-756.go b/internal/generator/config/bw6-756.go
index dcbb1f24b..394176595 100644
--- a/internal/generator/config/bw6-756.go
+++ b/internal/generator/config/bw6-756.go
@@ -159,6 +159,19 @@ var BW6_756 = Curve{
 	},
 }
 
+var tBW6_756 = TwistedEdwardsCurve{
+	Name:     BW6_756.Name,
+	Package:  "twistededwards",
+	EnumID:   BW6_756.EnumID,
+	A:        "35895",
+	D:        "35894",
+	Cofactor: "8",
+	Order:    "75656025759413271466656060197725120092480961471365614219134998880569790930794516726065877484428941069706901665493",
+	BaseX:    "357240753431396842603421262238241571158569743053156052278371293545344505472364896271378029423975465332156840775830",
+	BaseY:    "279345325880910540799960837653138904956852780817349960193932651092957355032339063742900216468694143617372745972501",
+}
+
 func init() {
 	addCurve(&BW6_756)
+	addTwistedEdwardCurve(&tBW6_756)
 }