diff --git a/tools/ci/check_stability.py b/tools/ci/check_stability.py
index d781bae3f9c960..1c36153bff5ed8 100644
--- a/tools/ci/check_stability.py
+++ b/tools/ci/check_stability.py
@@ -20,7 +20,7 @@
 from tools import localpaths
 
 logger = None
-stability_run, write_inconsistent, write_results = None, None, None
+run_step, write_inconsistent, write_results = None, None, None
 wptrunner = None
 
 def setup_logging():
@@ -35,10 +35,9 @@ def setup_logging():
 
 
 def do_delayed_imports():
-    global stability_run, write_inconsistent, write_results, wptrunner
-    from tools.wpt.stability import run as stability_run
-    from tools.wpt.stability import write_inconsistent, write_results
+    global wptrunner, run_step, write_inconsistent, write_results
     from wptrunner import wptrunner
+    from wptrunner.stability import run_step, write_inconsistent, write_results
 
 
 class TravisFold(object):
@@ -266,11 +265,15 @@ def run(venv, wpt_args, **kwargs):
 
         do_delayed_imports()
 
-        wpt_kwargs["stability"] = True
         wpt_kwargs["prompt"] = False
         wpt_kwargs["install_browser"] = True
         wpt_kwargs["install"] = wpt_kwargs["product"].split(":")[0] == "firefox"
 
+        wpt_kwargs["pause_after_test"] = False
+        wpt_kwargs["verify_log_full"] = True
+        if wpt_kwargs["repeat"] == 1:
+            wpt_kwargs["repeat"] = 10
+
         wpt_kwargs = setup_wptrunner(venv, **wpt_kwargs)
 
         logger.info("Using binary %s" % wpt_kwargs["binary"])
@@ -279,9 +282,8 @@ def run(venv, wpt_args, **kwargs):
     with TravisFold("running_tests"):
         logger.info("Starting tests")
 
-
         wpt_logger = wptrunner.logger
-        iterations, results, inconsistent = stability_run(venv, wpt_logger, **wpt_kwargs)
+        results, inconsistent, iterations = run_step(wpt_logger, wpt_kwargs["repeat"], True, {}, **wpt_kwargs)
 
     if results:
         if inconsistent:
diff --git a/tools/wpt/run.py b/tools/wpt/run.py
index 2c8edbbe9a58c2..26e01722dfd4a5 100644
--- a/tools/wpt/run.py
+++ b/tools/wpt/run.py
@@ -49,8 +49,6 @@ def create_parser():
                         help="Browser to run tests in")
     parser.add_argument("--yes", "-y", dest="prompt", action="store_false", default=True,
                         help="Don't prompt before installing components")
-    parser.add_argument("--stability", action="store_true",
-                        help="Stability check tests")
     parser.add_argument("--install-browser", action="store_true",
                         help="Install the latest development version of the browser")
     parser._add_container_actions(wptcommandline.create_parser())
@@ -434,7 +432,6 @@ def setup_wptrunner(venv, prompt=True, install=False, **kwargs):
 def run(venv, **kwargs):
     #Remove arguments that aren't passed to wptrunner
     prompt = kwargs.pop("prompt", True)
-    stability = kwargs.pop("stability", True)
     install_browser = kwargs.pop("install_browser", False)
 
     kwargs = setup_wptrunner(venv,
@@ -442,20 +439,7 @@ def run(venv, **kwargs):
                              install=install_browser,
                              **kwargs)
 
-    if stability:
-        import stability
-        iterations, results, inconsistent = stability.run(venv, logger, **kwargs)
-
-        def log(x):
-            print(x)
-
-        if inconsistent:
-            stability.write_inconsistent(log, inconsistent, iterations)
-        else:
-            log("All tests stable")
-        rv = len(inconsistent) > 0
-    else:
-        rv = run_single(venv, **kwargs) > 0
+    rv = run_single(venv, **kwargs) > 0
 
     return rv
 
diff --git a/tools/wpt/stability.py b/tools/wpt/stability.py
deleted file mode 100644
index b3c85700ec385d..00000000000000
--- a/tools/wpt/stability.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import os
-import sys
-from collections import OrderedDict, defaultdict
-
-from mozlog import reader
-from mozlog.formatters import JSONFormatter, TbplFormatter
-from mozlog.handlers import BaseHandler, LogLevelFilter, StreamHandler
-
-from markdown import markdown_adjust, table
-from wptrunner import wptrunner
-
-
-class LogActionFilter(BaseHandler):
-
-    """Handler that filters out messages not of a given set of actions.
-
-    Subclasses BaseHandler.
-
-    :param inner: Handler to use for messages that pass this filter
-    :param actions: List of actions for which to fire the handler
-    """
-
-    def __init__(self, inner, actions):
-        """Extend BaseHandler and set inner and actions props on self."""
-        BaseHandler.__init__(self, inner)
-        self.inner = inner
-        self.actions = actions
-
-    def __call__(self, item):
-        """Invoke handler if action is in list passed as constructor param."""
-        if item["action"] in self.actions:
-            return self.inner(item)
-
-
-class LogHandler(reader.LogHandler):
-
-    """Handle updating test and subtest status in log.
-
-    Subclasses reader.LogHandler.
-    """
-    def __init__(self):
-        self.results = OrderedDict()
-
-    def find_or_create_test(self, data):
-        test_name = data["test"]
-        if self.results.get(test_name):
-            return self.results[test_name]
-
-        test = {
-            "subtests": OrderedDict(),
-            "status": defaultdict(int)
-        }
-        self.results[test_name] = test
-        return test
-
-    def find_or_create_subtest(self, data):
-        test = self.find_or_create_test(data)
-        subtest_name = data["subtest"]
-
-        if test["subtests"].get(subtest_name):
-            return test["subtests"][subtest_name]
-
-        subtest = {
-            "status": defaultdict(int),
-            "messages": set()
-        }
-        test["subtests"][subtest_name] = subtest
-
-        return subtest
-
-    def test_status(self, data):
-        subtest = self.find_or_create_subtest(data)
-        subtest["status"][data["status"]] += 1
-        if data.get("message"):
-            subtest["messages"].add(data["message"])
-
-    def test_end(self, data):
-        test = self.find_or_create_test(data)
-        test["status"][data["status"]] += 1
-
-
-def is_inconsistent(results_dict, iterations):
-    """Return whether or not a single test is inconsistent."""
-    return len(results_dict) > 1 or sum(results_dict.values()) != iterations
-
-
-def process_results(log, iterations):
-    """Process test log and return overall results and list of inconsistent tests."""
-    inconsistent = []
-    handler = LogHandler()
-    reader.handle_log(reader.read(log), handler)
-    results = handler.results
-    for test_name, test in results.iteritems():
-        if is_inconsistent(test["status"], iterations):
-            inconsistent.append((test_name, None, test["status"], []))
-        for subtest_name, subtest in test["subtests"].iteritems():
-            if is_inconsistent(subtest["status"], iterations):
-                inconsistent.append((test_name, subtest_name, subtest["status"], subtest["messages"]))
-    return results, inconsistent
-
-
-def err_string(results_dict, iterations):
-    """Create and return string with errors from test run."""
-    rv = []
-    total_results = sum(results_dict.values())
-    for key, value in sorted(results_dict.items()):
-        rv.append("%s%s" %
-                  (key, ": %s/%s" % (value, iterations) if value != iterations else ""))
-    if total_results < iterations:
-        rv.append("MISSING: %s/%s" % (iterations - total_results, iterations))
-    rv = ", ".join(rv)
-    if is_inconsistent(results_dict, iterations):
-        rv = "**%s**" % rv
-    return rv
-
-
-def write_inconsistent(log, inconsistent, iterations):
-    """Output inconsistent tests to logger.error."""
-    log("## Unstable results ##\n")
-    strings = [(
-        "`%s`" % markdown_adjust(test),
-        ("`%s`" % markdown_adjust(subtest)) if subtest else "",
-        err_string(results, iterations),
-        ("`%s`" % markdown_adjust(";".join(messages))) if len(messages) else "")
-        for test, subtest, results, messages in inconsistent]
-    table(["Test", "Subtest", "Results", "Messages"], strings, log)
-
-
-def write_results(log, results, iterations, pr_number=None, use_details=False):
-    log("## All results ##\n")
-    if use_details:
-        log("<details>\n")
-        log("<summary>%i %s ran</summary>\n\n" % (len(results),
-                                                  "tests" if len(results) > 1
-                                                  else "test"))
-
-    for test_name, test in results.iteritems():
-        baseurl = "http://w3c-test.org/submissions"
-        if "https" in os.path.splitext(test_name)[0].split(".")[1:]:
-            baseurl = "https://w3c-test.org/submissions"
-        title = test_name
-        if use_details:
-            log("<details>\n")
-            if pr_number:
-                title = "<a href=\"%s/%s%s\">%s</a>" % (baseurl, pr_number, test_name, title)
-            log('<summary>%s</summary>\n\n' % title)
-        else:
-            log("### %s ###" % title)
-        strings = [("", err_string(test["status"], iterations), "")]
-
-        strings.extend(((
-            ("`%s`" % markdown_adjust(subtest_name)) if subtest else "",
-            err_string(subtest["status"], iterations),
-            ("`%s`" % markdown_adjust(';'.join(subtest["messages"]))) if len(subtest["messages"]) else "")
-            for subtest_name, subtest in test["subtests"].items()))
-        table(["Subtest", "Results", "Messages"], strings, log)
-        if use_details:
-            log("</details>\n")
-
-    if use_details:
-        log("</details>\n")
-
-
-def run(venv, logger, **kwargs):
-    kwargs["pause_after_test"] = False
-    if kwargs["repeat"] == 1:
-        kwargs["repeat"] = 10
-
-    handler = LogActionFilter(
-        LogLevelFilter(
-            StreamHandler(
-                sys.stdout,
-                TbplFormatter()
-            ),
-            "WARNING"),
-        ["log", "process_output"])
-
-    # There is a public API for this in the next mozlog
-    initial_handlers = logger._state.handlers
-    logger._state.handlers = []
-
-    with open("raw.log", "wb") as log:
-        # Setup logging for wptrunner that keeps process output and
-        # warning+ level logs only
-        logger.add_handler(handler)
-        logger.add_handler(StreamHandler(log, JSONFormatter()))
-
-        wptrunner.run_tests(**kwargs)
-
-    logger._state.handlers = initial_handlers
-
-    with open("raw.log", "rb") as log:
-        results, inconsistent = process_results(log, kwargs["repeat"])
-
-    return kwargs["repeat"], results, inconsistent
diff --git a/tools/wptrunner/wptrunner/stability.py b/tools/wptrunner/wptrunner/stability.py
index e1709c5749a433..859f01444d34ac 100644
--- a/tools/wptrunner/wptrunner/stability.py
+++ b/tools/wptrunner/wptrunner/stability.py
@@ -181,8 +181,8 @@ def run_step(logger, iterations, restart_after_iteration, kwargs_extras, **kwarg
     kwargs.update(kwargs_extras)
 
     def wrap_handler(x):
-        x = LogLevelFilter(x, "WARNING")
         if not kwargs["verify_log_full"]:
+            x = LogLevelFilter(x, "WARNING")
             x = LogActionFilter(x, ["log", "process_output"])
         return x
 
diff --git a/tools/wpt/tests/test_stability.py b/tools/wptrunner/wptrunner/tests/test_stability.py
similarity index 69%
rename from tools/wpt/tests/test_stability.py
rename to tools/wptrunner/wptrunner/tests/test_stability.py
index f2dd012087c6a1..72cff21016a8e5 100644
--- a/tools/wpt/tests/test_stability.py
+++ b/tools/wptrunner/wptrunner/tests/test_stability.py
@@ -1,4 +1,9 @@
-from tools.wpt import stability
+import sys
+from os.path import dirname, join
+
+sys.path.insert(0, join(dirname(__file__), "..", ".."))
+
+from wptrunner import stability
 
 def test_is_inconsistent():
     assert stability.is_inconsistent({"PASS": 10}, 10) is False
diff --git a/tools/wptrunner/wptrunner/wptcommandline.py b/tools/wptrunner/wptrunner/wptcommandline.py
index c4a01932de49fd..334df2f137036b 100644
--- a/tools/wptrunner/wptrunner/wptcommandline.py
+++ b/tools/wptrunner/wptrunner/wptcommandline.py
@@ -81,9 +81,13 @@ def create_parser(product_choices=None):
     mode_group.add_argument("--list-tests", action="store_true",
                             default=False,
                             help="List all tests that will run")
-    mode_group.add_argument("--verify", action="store_true",
-                            default=False,
-                            help="Run a stability check on the selected tests")
+    stability_group = mode_group.add_mutually_exclusive_group()
+    stability_group.add_argument("--verify", action="store_true",
+                                 default=False,
+                                 help="Run a stability check on the selected tests")
+    stability_group.add_argument("--stability", action="store_true",
+                                 default=False,
+                                 help=argparse.SUPPRESS)
     mode_group.add_argument("--verify-log-full", action="store_true",
                             default=False,
                             help="Output per-iteration test results when running verify")
diff --git a/tools/wptrunner/wptrunner/wptrunner.py b/tools/wptrunner/wptrunner/wptrunner.py
index 3ee4480a530d42..74f6555b28602c 100644
--- a/tools/wptrunner/wptrunner/wptrunner.py
+++ b/tools/wptrunner/wptrunner/wptrunner.py
@@ -300,6 +300,14 @@ def run_tests(config, test_paths, product, **kwargs):
 
 def check_stability(**kwargs):
     import stability
+    if kwargs["stability"]:
+        logger.warning("--stability is deprecated; please use --verify instead!")
+        kwargs['verify_max_time'] = None
+        kwargs['verify_chaos_mode'] = False
+        kwargs['verify_repeat_loop'] = 0
+        kwargs['verify_repeat_restart'] = 10 if kwargs['repeat'] == 1 else kwargs['repeat']
+        kwargs['verify_output_results'] = True
+
     return stability.check_stability(logger,
                                      max_time=kwargs['verify_max_time'],
                                      chaos_mode=kwargs['verify_chaos_mode'],
@@ -315,7 +323,7 @@ def start(**kwargs):
         list_disabled(**kwargs)
     elif kwargs["list_tests"]:
         list_tests(**kwargs)
-    elif kwargs["verify"]:
+    elif kwargs["verify"] or kwargs["stability"]:
         check_stability(**kwargs)
     else:
         return not run_tests(**kwargs)