diff --git a/src/pqm/pqm.py b/src/pqm/pqm.py
index 8ef12bd..c016a47 100644
--- a/src/pqm/pqm.py
+++ b/src/pqm/pqm.py
@@ -1,4 +1,5 @@
 from typing import Optional
+import warnings
 
 import numpy as np
 from scipy.stats import chi2_contingency, chi2
@@ -39,8 +40,18 @@ def _pqm_test(
     gauss_frac : float
         Fraction of samples to take from gaussian distribution with mean/std
         determined from the other reference samples. This ensures full support
-        of the reference samples if pathological behavior is expected.
-        Default: 0.0 no gaussian samples.
+        of the reference samples if pathological behavior is expected. Default:
+        0.0 no gaussian samples.
+
+    Note
+    ----
+        When using ``x_frac`` and ``gauss_frac``, note that the number of
+        reference samples from the x_samples, y_samples, and gaussian
+        distribution will be determined by a multinomial distribution. This
+        means that the actual number of reference samples from each distribution
+        may not be exactly equal to the requested fractions, but will on average
+        equal those numbers. For best results, we suggest using a large number
+        of re-tessellations, though this is our recommendation in any case.
 
     Returns
     -------
@@ -65,11 +76,15 @@ def _pqm_test(
     if x_frac is None:
         x_frac = len(x_samples) / (len(x_samples) + len(y_samples))
 
+    # Determine number of samples from each distribution (x_samples, y_samples, gaussian)
+    Nx, Ny, Ng = np.random.multinomial(
+        num_refs,
+        [x_frac * (1.0 - gauss_frac), (1.0 - x_frac) * (1.0 - gauss_frac), gauss_frac],
+    )
+
     # Collect reference samples from x_samples
-    if x_frac > 0:
-        xrefs = np.random.choice(
-            len(x_samples), int(x_frac * (1.0 - gauss_frac) * num_refs), replace=False
-        )
+    if Nx > 0:
+        xrefs = np.random.choice(len(x_samples), Nx, replace=False)
         N = np.arange(len(x_samples))
         N[xrefs] = -1
         N = N[N >= 0]
@@ -78,10 +93,8 @@ def _pqm_test(
         xrefs = np.zeros((0,) + x_samples.shape[1:])
 
     # Collect reference samples from y_samples
-    if x_frac < 1:
-        yrefs = np.random.choice(
-            len(y_samples), int((1.0 - x_frac) * (1.0 - gauss_frac) * num_refs), replace=False
-        )
+    if Ny > 0:
+        yrefs = np.random.choice(len(y_samples), Ny, replace=False)
         N = np.arange(len(y_samples))
         N[yrefs] = -1
         N = N[N >= 0]
@@ -93,11 +106,19 @@ def _pqm_test(
     refs = np.concatenate([xrefs, yrefs], axis=0)
 
     # get gaussian reference points if requested
-    if gauss_frac > 0:
+    if Ng > 0:
+        if Nx + Ny > 2:
+            m, s = np.mean(refs, axis=0), np.std(refs, axis=0)
+        else:
+            warnings.warn(
+                f"Very low number of x/y reference samples used ({Nx+Ny}). Initializing gaussian from all y_samples. We suggest increasing num_refs or decreasing gauss_frac.",
+                UserWarning,
+            )
+            m, s = np.mean(y_samples, axis=0), np.std(y_samples, axis=0)
         gauss_refs = np.random.normal(
-            loc=np.mean(refs, axis=0),
-            scale=np.std(refs, axis=0),
-            size=(int(gauss_frac * num_refs), *refs.shape[1:]),
+            loc=m,
+            scale=s,
+            size=(Ng, *x_samples.shape[1:]),
         )
         refs = np.concatenate([refs, gauss_refs], axis=0)
 
@@ -128,7 +149,8 @@ def pqm_pvalue(
 ):
     """
     Perform the PQM test of the null hypothesis that `x_samples` and `y_samples`
-    are drawn form the same distribution.
+    are drawn from the same distribution. This version returns the pvalue under
+    the null hypothesis that both samples are drawn from the same distribution.
 
     Parameters
     ----------
@@ -158,8 +180,18 @@ def pqm_pvalue(
     gauss_frac : float
         Fraction of samples to take from gaussian distribution with mean/std
         determined from the other reference samples. This ensures full support
-        of the reference samples if pathological behavior is expected.
-        Default: 0.0 no gaussian samples
+        of the reference samples if pathological behavior is expected. Default:
+        0.0 no gaussian samples.
+
+    Note
+    ----
+        When using ``x_frac`` and ``gauss_frac``, note that the number of
+        reference samples from the x_samples, y_samples, and gaussian
+        distribution will be determined by a multinomial distribution. This
+        means that the actual number of reference samples from each distribution
+        may not be exactly equal to the requested fractions, but will on average
+        equal those numbers. For best results, we suggest using a large number
+        of re-tessellations, though this is our recommendation in any case.
 
     Returns
     -------
@@ -194,7 +226,8 @@ def pqm_chi2(
 ):
     """
     Perform the PQM test of the null hypothesis that `x_samples` and `y_samples`
-    are drawn form the same distribution.
+    are drawn from the same distribution. This version returns the chi^2
+    statistic with dof = num_refs-1.
 
     Parameters
     ----------
@@ -224,13 +257,34 @@ def pqm_chi2(
     gauss_frac : float
         Fraction of samples to take from gaussian distribution with mean/std
         determined from the other reference samples. This ensures full support
-        of the reference samples if pathological behavior is expected.
-        Default: 0.0 no gaussian samples
+        of the reference samples if pathological behavior is expected. Default:
+        0.0 no gaussian samples.
+
+    Note
+    ----
+        When using ``x_frac`` and ``gauss_frac``, note that the number of
+        reference samples from the x_samples, y_samples, and gaussian
+        distribution will be determined by a multinomial distribution. This
+        means that the actual number of reference samples from each distribution
+        may not be exactly equal to the requested fractions, but will on average
+        equal those numbers. For best results, we suggest using a large number
+        of re-tessellations, though this is our recommendation in any case.
+
+    Note
+    ----
+        Some voronoi bins may be empty after counting. Due to the nature of
+        ``scipy.stats.chi2_contingency`` we must first remove those voronoi
+        cells, meaning that the dof of the chi^2 would change. To mitigate this
+        effect, we rescale the chi^2 statistic to have the same cumulative
+        probability as the desired chi^2 statistic. Thus, the returned chi^2 is
+        always in reference to dof = num_refs-1. Thus users should not need to
+        worry about this, but it is worth noting, please contact us if you
+        notice unusual behavior.
 
     Returns
     -------
     float or list
-        chi2 statistic(s) and degree(s) of freedom.
+        chi2 statistic(s).
     """
     if re_tessellation is not None:
         return [