From dc4f1798fe2fa7429e2551caba529cf0b022150d Mon Sep 17 00:00:00 2001
From: Iain Dunning <iaindunning@gmail.com>
Date: Thu, 10 Sep 2015 22:08:48 -0400
Subject: [PATCH 1/2] Add new Base.Test

---
 base/docs/helpdb.jl |  37 ---
 base/test.jl        | 644 +++++++++++++++++++++++++++++++++++++++-----
 test/test.jl        | 134 ++++-----
 3 files changed, 643 insertions(+), 172 deletions(-)

diff --git a/base/docs/helpdb.jl b/base/docs/helpdb.jl
index 48e030f43b403..6aa8599c3899b 100644
--- a/base/docs/helpdb.jl
+++ b/base/docs/helpdb.jl
@@ -520,43 +520,6 @@ Return `true` iff an array is heap-ordered according to the given order.
 """
 Collections.isheap
 
-# Base.Test
-
-doc"""
-    @test_throws(extype, ex)
-
-Test that the expression `ex` throws an exception of type `extype` and calls the current handler to handle the result.
-"""
-:(Test.@test_throws)
-
-doc"""
-    @test_approx_eq_eps(a, b, tol)
-
-Test two floating point numbers `a` and `b` for equality taking in account a margin of tolerance given by `tol`.
-"""
-:(Test.@test_approx_eq_eps)
-
-doc"""
-    @test(ex)
-
-Test the expression `ex` and calls the current handler to handle the result.
-"""
-:(Test.@test)
-
-doc"""
-    @test_approx_eq(a, b)
-
-Test two floating point numbers `a` and `b` for equality taking in account small numerical errors.
-"""
-:(Test.@test_approx_eq)
-
-doc"""
-    with_handler(f, handler)
-
-Run the function `f` using the `handler` as the handler.
-"""
-Test.with_handler
-
 # Base.Profile
 
 doc"""
diff --git a/base/test.jl b/base/test.jl
index a9172998fa513..e74e695c2601d 100644
--- a/base/test.jl
+++ b/base/test.jl
@@ -1,111 +1,605 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
+"""
+Simple unit testing functionality:
+
+* `@test`
+* `@test_throws`
+
+All tests belong to a *test set*. There is a default, task-level
+test set that throws on the first failure. Users can choose to wrap
+their tests in (possibly nested) test sets that will store results
+and summarize them at the end of the test set. See:
+
+* `@testset`
+* `@testloop`
+
+for more information.
+"""
 module Test
 
-export @test, @test_throws, @test_approx_eq, @test_approx_eq_eps, @inferred
+export @test, @test_throws
+export @testset, @testloop
+# Legacy approximate testing functions, yet to be included
+export @test_approx_eq, @test_approx_eq_eps, @inferred
 
+#-----------------------------------------------------------------------
+
+"""
+    Result
+
+All tests produce a result object. This object may or may not be
+'stored', depending on whether the test is part of a test set.
+"""
 abstract Result
-type Success <: Result
+
+"""
+    Pass
+
+The test condition was true, i.e. the expression evaluated to true or
+the correct exception was thrown.
+"""
+immutable Pass <: Result
+    test_type::Symbol
+    orig_expr
     expr
-    resultexpr
-    res
-    Success(expr, resultexpr=nothing, res=nothing) = new(expr, resultexpr, res)
+    value
+end
+function Base.show(io::IO, t::Pass)
+    print_with_color(:green, io, "Test Passed\n")
+    print(io, "  Expression: ", t.orig_expr)
+    if !isa(t.expr, Expr)
+        # Maybe just a constant, like true
+        print(io, "\n   Evaluated: ", t.expr)
+    elseif t.test_type == :test && t.expr.head == :comparison
+        # The test was an expression, so display the term-by-term
+        # evaluated version as well
+        print(io, "\n   Evaluated: ", t.expr)
+    elseif t.test_type == :test_throws
+        # The correct type of exception was thrown
+        print(io, "\n      Thrown: ", typeof(t.value))
+    end
 end
-type Failure <: Result
+
+"""
+    Pass
+
+The test condition was false, i.e. the expression evaluated to false or
+the correct exception was not thrown.
+"""
+type Fail <: Result
+    test_type::Symbol
+    orig_expr
     expr
-    resultexpr
+    value
+end
+function Base.show(io::IO, t::Fail)
+    print_with_color(:red, io, "Test Failed\n")
+    print(io, "  Expression: ", t.orig_expr)
+    if t.test_type == :test_throws
+        # Either no exception, or wrong exception
+        print(io, "\n    Expected: ", t.expr)
+        print(io, "\n      Thrown: ", typeof(t.value))
+    elseif !isa(t.expr, Expr)
+        # Maybe just a constant, like false
+        print(io, "\n   Evaluated: ", t.expr)
+    elseif t.test_type == :test && t.expr.head == :comparison
+        # The test was an expression, so display the term-by-term
+        # evaluated version as well
+        print(io, "\n   Evaluated: ", t.expr)
+    end
 end
-Failure(expr) = Failure(expr, nothing)
+
+"""
+    Error
+
+The test condition couldn't be evaluated due to an exception, or
+it evaluated to something other than a `Bool`.
+"""
 type Error <: Result
-    expr
-    err
+    test_type::Symbol
+    orig_expr
+    value
     backtrace
 end
-
-default_handler(r::Success) = r.res
-function default_handler(r::Failure)
-    if r.resultexpr !== nothing
-        error("test failed: $(r.resultexpr)\n in expression: $(r.expr)")
-    else
-        error("test failed in expression: $(r.expr)")
+function Base.show(io::IO, t::Error)
+    print_with_color(:red, io, "Error During Test\n")
+    if t.test_type == :test_nonbool
+        println(io, "  Expression evaluated to non-Boolean")
+        println(io, "  Expression: ", t.orig_expr)
+        print(  io, "       Value: ", t.value)
+    elseif t.test_type == :test_error
+        println(io, "  Test threw an exception of type ", typeof(t.value))
+        println(io, "  Expression: ", t.orig_expr)
+        # Capture error message and indent to match
+        errmsg = sprint(showerror, t.value, t.backtrace)
+        print(io, join(map(line->string("  ",line),
+                            split(errmsg, "\n")), "\n"))
     end
 end
-default_handler(r::Error) = rethrow(r)
 
-handler() = get(task_local_storage(), :TEST_HANDLER, default_handler)
 
-with_handler(f::Function, handler) =
-    task_local_storage(f, :TEST_HANDLER, handler)
+#-----------------------------------------------------------------------
 
-import Base.showerror
+# @test - check if the expression evaluates to true
+# In the special case of a comparison, e.g. x == 5, generate code to
+# evaluate each term in the comparison individually so the results
+# can be displayed nicely.
+"""
+    @test ex
 
-showerror(io::IO, r::Error) = showerror(io, r, [])
-function showerror(io::IO, r::Error, bt)
-    println(io, "test error in expression: $(r.expr)")
-    showerror(io, r.err, r.backtrace)
+Tests that the expression `ex` evaluates to `true`.
+Returns a `Pass` `Result` if it does, a `Fail` `Result` if it is
+`false`, and an `Error` `Result` if it could not be evaluated.
+"""
+macro test(ex)
+    # If the test is a comparison
+    if typeof(ex) == Expr && ex.head == :comparison
+        # Generate a temporary for every term in the expression
+        n = length(ex.args)
+        terms = [gensym() for i in 1:n]
+        # Create a new block that evaluates each term in the
+        # comparison indivudally
+        comp_block = Expr(:block)
+        comp_block.args = [:(
+                            $(terms[i]) = $(esc(ex.args[i]))
+                            ) for i in 1:n]
+        # The block should then evaluate whether the comparison
+        # evaluates to true by splicing in the new terms into the
+        # original comparsion. The block returns
+        # - an expression with the values of terms spliced in
+        # - the result of the comparison itself
+        push!(comp_block.args, Expr(:return,
+            :(  Expr(:comparison, $(terms...)),  # Terms spliced in
+              $(Expr(:comparison,   terms...))   # Comparison itself
+            )))
+        # Return code that calls do_test with an anonymous function
+        # that calls the comparison block
+        :(do_test(()->($comp_block), $(Expr(:quote,ex))))
+    else
+        # Something else, perhaps just a single value
+        # Return code that calls do_test with an anonymous function
+        # that returns the expression and its value
+        :(do_test(()->($(Expr(:quote,ex)), $(esc(ex))), $(Expr(:quote,ex))))
+    end
 end
 
-function do_test(body,qex)
-    handler()(try
-        rex, val = body()
-        val ? Success(qex, rex) : Failure(qex,rex)
+# An internal function, called by the code generated by the @test
+# macro to actually perform the evaluation and manage the result.
+function do_test(predicate, orig_expr)
+    # get_testset() returns the most recently added tests set
+    # We then call record() with this test set and the test result
+    record(get_testset(),
+    try
+        # expr, in the case of a comparison, will contain the
+        # comparison with evaluated values of each term spliced in.
+        # For anything else, just contains the test expression.
+        # value is the evaluated value of the whole test expression.
+        # Ideally it is true, but it may be false or non-Boolean.
+        expr, value = predicate()
+        if isa(value, Bool)
+            value ? Pass(:test, orig_expr, expr, value) :
+                    Fail(:test, orig_expr, expr, value)
+        else
+            # If the result is non-Boolean, this counts as an Error
+            Error(:test_nonbool, orig_expr, value, nothing)
+        end
     catch err
-        Error(qex,err,catch_backtrace())
+        # The predicate couldn't be evaluated without throwing an
+        # exception, so that is an Error and not a Fail
+        Error(:test_error, orig_expr, err, catch_backtrace())
     end)
 end
 
-function do_test_throws(body, qex, bt, extype)
-    handler()(try
-        body()
-        Failure(qex, "$qex did not throw $(extype === nothing ? "anything" : extype)")
+#-----------------------------------------------------------------------
+
+"""
+    @test_throws extype ex
+
+Tests that the expression `ex` throws an exception of type `extype`.
+"""
+macro test_throws(extype, ex)
+    :(do_test_throws( ()->($(esc(ex))), $(Expr(:quote,ex)),
+                      backtrace(), $(esc(extype)) ))
+end
+
+# An internal function, called by the code generated by @test_throws
+# to evaluate and catch the thrown exception - if it exists
+function do_test_throws(predicate, orig_expr, bt, extype)
+    record(get_testset(),
+    try
+        predicate()
+        # If we hit this line, no exception was thrown. We treat
+        # this as equivalent to the wrong exception being thrown.
+        Fail(:test_throws, orig_expr, extype, nothing)
     catch err
-        if extype === nothing
-            Base.warn("""
-            @test_throws without an exception type is deprecated;
-            Use `@test_throws $(typeof(err)) $(qex)` instead.
-            """, bt = bt)
-            Success(qex, nothing, err)
+        # Check the right type of exception was thrown
+        if isa(err, extype)
+            Pass(:test_throws, orig_expr, extype, err)
         else
-            if isa(err, extype)
-                Success(qex, nothing, err)
-            else
-                if isa(err,Type)
-                    Failure(qex, "the type $err was thrown instead of an instance of $extype")
-                else
-                    Failure(qex, "$err was thrown instead of $extype")
-                end
-            end
+            Fail(:test_throws, orig_expr, extype, err)
         end
     end)
 end
 
-macro test(ex)
-    if typeof(ex) == Expr && ex.head == :comparison
-        syms = [gensym() for i = 1:length(ex.args)]
-        func_block = Expr(:block)
-        # insert assignment into a block
-        func_block.args = [:($(syms[i]) = $(esc(ex.args[i]))) for i = 1:length(ex.args)]
-        # finish the block with a return
-        push!(func_block.args, Expr(:return, :(Expr(:comparison, $(syms...)), $(Expr(:comparison, syms...)))))
-        :(do_test(()->($func_block), $(Expr(:quote,ex))))
+#-----------------------------------------------------------------------
+
+# The AbstractTestSet interface is defined by two methods:
+# record(AbstractTestSet, Result)
+#   Called by do_test after a test is evaluated
+# finish(AbstractTestSet)
+#   Called after the test set has been popped from the test set stack
+abstract AbstractTestSet
+
+"""
+    TestSetException
+
+Thrown when a test set finishes and not all tests passed.
+"""
+type TestSetException <: Exception
+    pass::Int
+    fail::Int
+    error::Int
+end
+function Base.show(io::IO, ex::TestSetException)
+    print(io, "Some tests did not pass: ")
+    print(io, ex.pass,  " passed, ")
+    print(io, ex.fail,  " failed, ")
+    print(io, ex.error, " errored.")
+end
+
+#-----------------------------------------------------------------------
+
+"""
+    FallbackTestSet
+
+A simple fallback test set that throws immediately on a failure.
+"""
+immutable FallbackTestSet <: AbstractTestSet
+end
+fallback_testset = FallbackTestSet()
+
+# Records nothing, and throws an error immediately whenever a Fail or
+# Error occurs. Takes no action in the event of a Pass result
+record(ts::FallbackTestSet, t::Pass) = t
+function record(ts::FallbackTestSet, t::Union{Fail,Error})
+    println(t)
+    error("There was an error during testing")
+    t
+end
+# We don't need to do anything as we don't record anything
+finish(ts::FallbackTestSet) = nothing
+
+#-----------------------------------------------------------------------
+
+"""
+    DefaultTestSet
+
+If using the DefaultTestSet, the test results will be recorded. If there
+are any `Fail`s or `Error`s, an exception will be thrown only at the end,
+along with a summary of the test results.
+"""
+type DefaultTestSet <: AbstractTestSet
+    description::AbstractString
+    results::Vector
+    anynonpass::Bool
+end
+DefaultTestSet(desc) = DefaultTestSet(desc, [], false)
+
+# For a passing result, simply store the result
+record(ts::DefaultTestSet, t::Pass) = (push!(ts.results, t); t)
+# For the other result types, immediately print the error message
+# but do not terminate. Print a backtrace.
+function record(ts::DefaultTestSet, t::Union{Fail,Error})
+    print_with_color(:white, ts.description, ": ")
+    print(t)
+    Base.show_backtrace(STDOUT, backtrace())
+    println()
+    push!(ts.results, t)
+    t
+end
+
+# When a DefaultTestSet finishes, it records itself to its parent
+# testset, if there is one. This allows for recursive printing of
+# the results at the end of the tests
+record(ts::DefaultTestSet, t::AbstractTestSet) = push!(ts.results, t)
+
+# Called at the end of a @testset, behaviour depends on whether
+# this is a child of another testset, or the "root" testset
+function finish(ts::DefaultTestSet)
+    # If we are a nested test set, do not print a full summary
+    # now - let the parent test set do the printing
+    if get_testset_depth() != 0
+        # Attach this test set to the parent test set
+        parent_ts = get_testset()
+        record(parent_ts, ts)
+        return
+    end
+    # Calculate the overall number for each type so each of
+    # the test result types are aligned
+    passes, fails, errors, c_passes, c_fails, c_errors = get_test_counts(ts)
+    total_pass  = passes + c_passes
+    total_fail  = fails  + c_fails
+    total_error = errors + c_errors
+    dig_pass  = total_pass  > 0 ? ndigits(total_pass) : 0
+    dig_fail  = total_fail  > 0 ? ndigits(total_fail)  : 0
+    dig_error = total_error > 0 ? ndigits(total_error) : 0
+    total = total_pass + total_fail + total_error
+    dig_total = total > 0 ? ndigits(total) : 0
+    # For each category, take max of digits and header width if there are
+    # tests of that type
+    pass_width  = dig_pass  > 0 ? max(length("Pass"),  dig_pass)  : 0
+    fail_width  = dig_fail  > 0 ? max(length("Fail"),  dig_fail)  : 0
+    error_width = dig_error > 0 ? max(length("Error"), dig_error) : 0
+    total_width = dig_total > 0 ? max(length("Total"), dig_total) : 0
+    # Calculate the alignment of the test result counts by
+    # recursively walking the tree of test sets
+    align = max(get_alignment(ts, 0), length("Test Summary:"))
+    # Print the outer test set header once
+    print_with_color(:white, rpad("Test Summary:",align," "))
+    print(" | ")
+    if pass_width > 0
+        print_with_color(:green, lpad("Pass",pass_width," "))
+        print("  ")
+    end
+    if fail_width > 0
+        print_with_color(:red, lpad("Fail",fail_width," "))
+        print("  ")
+    end
+    if error_width > 0
+        print_with_color(:red, lpad("Error",error_width," "))
+        print("  ")
+    end
+    if total_width > 0
+        print_with_color(:blue, lpad("Total",total_width," "))
+    end
+    println()
+    # Recursively print a summary at every level
+    print_counts(ts, 0, align, pass_width, fail_width, error_width, total_width)
+    # Finally throw an error as we are the outermost test set
+    if total != total_pass
+        throw(TestSetException(total_pass,total_fail,total_error))
+    end
+end
+
+# Recursive function that finds the column that the result counts
+# can begin at by taking into account the width of the descriptions
+# and the amount of indentation. If a test set had no failures, and
+# no failures in child test sets, there is no need to include those
+# in calculating the alignment
+function get_alignment(ts::DefaultTestSet, depth::Int)
+    # The minimum width at this depth is...
+    ts_width = 2*depth + length(ts.description)
+    # If all passing, no need to look at children
+    !ts.anynonpass && return ts_width
+    # Return the maximum of this width and the minimum width
+    # for all children (if they exist)
+    length(ts.results) == 0 && return ts_width
+    child_widths = map(t->get_alignment(t, depth+1), ts.results)
+    return max(ts_width, maximum(child_widths))
+end
+get_alignment(ts, depth::Int) = 0
+
+# Recursive function that counts the number of test results of each
+# type directly in the testset, and totals across the child testsets
+function get_test_counts(ts::DefaultTestSet)
+    passes, fails, errors = 0, 0, 0
+    c_passes, c_fails, c_errors = 0, 0, 0
+    for t in ts.results
+        isa(t, Pass)  && (passes += 1)
+        isa(t, Fail)  && (fails  += 1)
+        isa(t, Error) && (errors += 1)
+        if isa(t, DefaultTestSet)
+            np, nf, ne, ncp, ncf, nce = get_test_counts(t)
+            c_passes += np + ncp
+            c_fails  += nf + ncf
+            c_errors += ne + nce
+        end
+    end
+    ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
+    return passes, fails, errors, c_passes, c_fails, c_errors
+end
+
+# Recursive function that prints out the results at each level of
+# the tree of test sets
+function print_counts(ts::DefaultTestSet, depth, align,
+                        pass_width, fail_width, error_width, total_width)
+    # Count results by each type at this level, and recursively
+    # through and child test sets
+    passes, fails, errors, c_passes, c_fails, c_errors = get_test_counts(ts)
+    subtotal = passes + fails + errors + c_passes + c_fails + c_errors
+
+    # Print test set header, with an alignment that ensures all
+    # the test results appear above each other
+    print(rpad(string("  "^depth, ts.description), align, " "), " | ")
+
+    np = passes + c_passes
+    if np > 0
+        print_with_color(:green, lpad(string(np), pass_width, " "), "  ")
+    elseif pass_width > 0
+        # No passes at this level, but some at another level
+        print(" "^pass_width, "  ")
+    end
+
+    nf = fails + c_fails
+    if nf > 0
+        print_with_color(:red, lpad(string(nf), fail_width, " "), "  ")
+    elseif fail_width > 0
+        # No fails at this level, but some at another level
+        print(" "^fail_width, "  ")
+    end
+
+    ne = errors + c_errors
+    if ne > 0
+        print_with_color(:red, lpad(string(ne), error_width, " "), "  ")
+    elseif error_width > 0
+        # No errors at this level, but some at another level
+        print(" "^error_width, "  ")
+    end
+
+    if np == 0 && nf == 0 && ne == 0
+        print_with_color(:blue, "No tests")
     else
-        :(do_test(()->($(Expr(:quote,ex)), $(esc(ex))), $(Expr(:quote,ex))))
+        print_with_color(:blue, lpad(string(subtotal), total_width, " "))
+    end
+    println()
+
+    # Only print results at lower levels if we had failures
+    if np != subtotal
+        for t in ts.results
+            if isa(t, DefaultTestSet)
+                print_counts(t, depth + 1, align,
+                                pass_width, fail_width, error_width, total_width)
+            end
+        end
+    end
+end
+
+#-----------------------------------------------------------------------
+
+"""
+    @testset "description" begin ... end
+    @testset begin ... end
+
+Starts a new test set. The test results will be recorded, and if there
+are any `Fail`s or `Error`s, an exception will be thrown only at the end,
+along with a summary of the test results.
+"""
+macro testset(args...)
+    # Parse arguments to do determine if any options passed in
+    if length(args) == 2
+        # Looks like description format
+        desc, tests = args
+        !isa(desc, AbstractString) && error("Unexpected argument to @testset")
+    elseif length(args) == 1
+        # No description provided
+        desc, tests = "test set", args[1]
+    elseif length(args) >= 3
+        error("Too many arguments to @testset")
+    else
+        error("Too few arguments to @testset")
+    end
+    # Generate a block of code that initializes a new testset, adds
+    # it to the task local storage, evaluates the test(s), before
+    # finally removing the testset and giving it a change to take
+    # action (such as reporting the results)
+    ts = gensym()
+    quote
+        $ts = DefaultTestSet($desc)
+        add_testset($ts)
+        $(esc(tests))
+        pop_testset()
+        finish($ts)
     end
 end
 
-macro test_throws(args...)
-    ex = nothing
-    extype = nothing
-    # Users should pass (ExceptionType, Expression) but we give a warning to users that only pass (Expression)
-    if length(args) == 1
-        ex = args[1]
-    elseif length(args) == 2
-        ex = args[2]
-        extype = args[1]
+
+"""
+    @testloop "description \$v" for v in (...) ... end
+    @testloop for x in (...), y in (...) ... end
+
+Starts a new test set for each iteration of the loop. The description
+string accepts interpolation from the loop indices. If no description
+is provided, one is constructed based on the variables.
+"""
+macro testloop(args...)
+    # Parse arguments to do determine if any options passed in
+    if length(args) == 2
+        # Looks like description format
+        desc, testloop = args
+        isa(desc,AbstractString) || (isa(desc,Expr) && desc.head == :string) || error("Unexpected argument to @testloop")
+        isa(testloop,Expr) && testloop.head == :for || error("Unexpected argument to @testloop")
+
+    elseif length(args) == 1
+        # No description provided
+        testloop = args[1]
+        isa(testloop,Expr) && testloop.head == :for || error("Unexpected argument to @testloop")
+        loopvars = testloop.args[1]
+        if loopvars.head == :(=)
+            # 1 variable
+            v = loopvars.args[1]
+            desc = Expr(:string,"$v = ",v)
+        else
+            # multiple variables
+            v = loopvars.args[1].args[1]
+            desc = Expr(:string,"$v = ",v) # first variable
+            for l = loopvars.args[2:end]
+                v = l.args[1]
+                push!(desc.args,", $v = ")
+                push!(desc.args,v)
+            end
+        end
+    elseif length(args) >= 3
+        error("Too many arguments to @testloop")
+    else
+        error("Too few arguments to @testloop")
+    end
+
+    # Uses a similar block as for `@testset`, except that it is
+    # wrapped in the outer loop provided by the user
+    ts = gensym()
+    tests = testloop.args[2]
+    blk = quote
+        $ts = DefaultTestSet($(esc(desc)))
+        add_testset($ts)
+        $(esc(tests))
+        pop_testset()
+        finish($ts)
     end
-    :(do_test_throws(()->($(esc(ex))),$(Expr(:quote,ex)),backtrace(),$(esc(extype))))
+    Expr(:for,esc(testloop.args[1]),blk)
+end
+
+#-----------------------------------------------------------------------
+# Various helper methods for test sets
+
+"""
+    get_testset()
+
+Retrieve the active test set from the task's local storage. If no
+test set is active, use the fallback default test set.
+"""
+function get_testset()
+    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
+    return length(testsets) == 0 ? fallback_testset : testsets[end]
+end
+
+"""
+    add_testset(ts::AbstractTestSet)
+
+Adds the test set to the task_local_storage.
+"""
+function add_testset(ts::AbstractTestSet)
+    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
+    push!(testsets, ts)
+    setindex!(task_local_storage(), testsets, :__BASETESTNEXT__)
+end
+
+"""
+    pop_testset()
+
+Pops the last test set added to the task_local_storage. If there are no
+active test sets, returns the default test set.
+"""
+function pop_testset()
+    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
+    ret = length(testsets) == 0 ? fallback_testset : pop!(testsets)
+    setindex!(task_local_storage(), testsets, :__BASETESTNEXT__)
+    return ret
+end
+
+"""
+    get_testset_depth()
+
+Returns the number of active test sets, not including the defaut test set
+"""
+function get_testset_depth()
+    testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
+    return length(testsets)
 end
 
+#-----------------------------------------------------------------------
+# Legacy approximate testing functions, yet to be included
+
 approx_full(x::AbstractArray) = x
 approx_full(x::Number) = x
 approx_full(x) = full(x)
@@ -145,10 +639,20 @@ array_eps(a) = eps(float(maximum(x->(isfinite(x) ? abs(x) : oftype(x,NaN)), a)))
 test_approx_eq(va, vb, astr, bstr) =
     test_approx_eq(va, vb, 1E4*length(va)*max(array_eps(va), array_eps(vb)), astr, bstr)
 
+"""
+    @test_approx_eq_eps(a, b, tol)
+
+Test two floating point numbers `a` and `b` for equality taking in account a margin of tolerance given by `tol`.
+"""
 macro test_approx_eq_eps(a, b, c)
     :(test_approx_eq($(esc(a)), $(esc(b)), $(esc(c)), $(string(a)), $(string(b))))
 end
 
+"""
+    @test_approx_eq(a, b)
+
+Test two floating point numbers `a` and `b` for equality taking in account small numerical errors.
+"""
 macro test_approx_eq(a, b)
     :(test_approx_eq($(esc(a)), $(esc(b)), $(string(a)), $(string(b))))
 end
diff --git a/test/test.jl b/test/test.jl
index bbd8adde09ed0..bfd2aba90dcfc 100644
--- a/test/test.jl
+++ b/test/test.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: http://julialang.org/license
 
+using Base.Test
+
 # test file to test testing
 
 # Test @test
@@ -9,9 +11,6 @@
 @test strip("\t  hi   \n") == "hi"
 @test strip("\t  this should fail   \n") != "hi"
 
-scary = Base.Test.Error("hi",DimensionMismatch,[])
-@test sprint(showerror,scary) == "test error in expression: hi\nDimensionMismatch"
-
 a = Array(Float64, 2, 2, 2, 2, 2)
 a[1,1,1,1,1] = 10
 @test a[1,1,1,1,1] == 10
@@ -19,74 +18,79 @@ a[1,1,1,1,1] = 10
 
 @test rand() != rand()
 
+sprint(show, @test true)
+sprint(show, @test 10 == 2*5)
+sprint(show, @test !false)
 
-# Test with_handler
-successflag = false
-failureflag = false
-errorflag = false
-test_handler(r::Test.Success) = !successflag
-test_handler(r::Test.Failure) = !failureflag
-test_handler(r::Test.Error) = !errorflag
+OLD_STDOUT = STDOUT
+catch_out = IOStream("")
+rd, wr = redirect_stdout()
 
-Test.with_handler(test_handler) do
+@testset "no errors" begin
     @test true
-    @test successflag
-    @test !failureflag
-    @test !errorflag
-    successflag = false
-    @test false
-    @test !successflag
-    @test failureflag
-    @test !errorflag
-    failureflag = false
-    @test error("throw error")
-    @test !successflag
-    @test !failureflag
-    @test errorflag
-end
-
-# Test evaluation of comparison tests
-i7586_1() = 1
-i7586_2() = 7
-i7586_3() = 9
-
-comparison_flags_s = [false,false,false]
-comparison_flags_f = [false,false,false]
-function test_handler2(r::Test.Success)
-    comparison_flags_s[1] = (r.resultexpr.args[1] == 1)
-    comparison_flags_s[2] = (r.resultexpr.args[3] == 7)
-    comparison_flags_s[3] = (r.resultexpr.args[5] == 9)
+    @test 1 == 1
 end
 
-function test_handler2(r::Test.Failure)
-    comparison_flags_f[1] = (r.resultexpr.args[1] == 1)
-    comparison_flags_f[2] = (r.resultexpr.args[3] == 7)
-    comparison_flags_f[3] = (r.resultexpr.args[5] == 10)
+try
+
+@testset "outer" begin
+    @testset "inner1" begin
+        @test true
+        @test false
+        @test 1 == 1
+        @test 2 == :foo
+        @test 3 == 3
+        @testset "d" begin
+            @test 4 == 4
+        end
+        @testset begin
+            @test :blank != :notblank
+        end
+    end
+    @testset "inner1" begin
+        @test 1 == 1
+        @test 2 == 2
+        @test 3 == :bar
+        @test 4 == 4
+        @test_throws ErrorException 1+1
+        @test_throws ErrorException error()
+        @testset "errrrr" begin
+            @test "not bool"
+            @test error()
+        end
+    end
+
+    @testset "loop with desc" begin
+        @testloop "loop1 $T" for T in (Float32, Float64)
+            @test 1 == T(1)
+        end
+    end
+    @testset "loops without desc" begin
+        @testloop for T in (Float32, Float64)
+            @test 1 == T(1)
+        end
+        @testloop for T in (Float32, Float64), S in (Int32,Int64)
+            @test S(1) == T(1)
+        end
+    end
+    srand(123)
+    @testset "some loops fail" begin
+        @testloop for i in 1:5
+            @test i <= rand(1:10)
+        end
+    end
 end
-
-Test.with_handler(test_handler2) do
-    @test i7586_1() <= i7586_2() <= i7586_3()
-    @test i7586_1() >= i7586_2() >= 10
+    # These lines shouldn't be called
+    redirect_stdout(OLD_STDOUT)
+    error("No exception was thrown!")
+catch ex
+    redirect_stdout(OLD_STDOUT)
+
+    @test isa(ex, Test.TestSetException)
+    @test ex.pass  == 21
+    @test ex.fail  == 5
+    @test ex.error == 2
 end
-@test all(comparison_flags_s)
-@test all(comparison_flags_f)
-
-# Test @test_throws
-domainerror_thrower() = throw(DomainError())
-boundserror_thrower() = throw(BoundsError())
-error_thrower() = error("An error happened")
-@test_throws DomainError domainerror_thrower()
-@test_throws BoundsError boundserror_thrower()
-
-failureflag = false
-successflag = false
-Test.with_handler(test_handler) do
-    @test_throws DomainError boundserror_thrower()
-    @test failureflag
-    @test_throws DomainError domainerror_thrower()
-    @test successflag
-end
-
 
 # Test @test_approx_eq
 # TODO
@@ -97,4 +101,4 @@ end
 @test_throws ErrorException Test.test_approx_eq(ones(10),zeros(10),1e-8,"a","b")
 
 # Test @test_approx_eq_eps
-# TODO
+# TODO
\ No newline at end of file

From e385bbcbd7483751e47c74d9dc99d83f939fbd55 Mon Sep 17 00:00:00 2001
From: Iain Dunning <iaindunning@gmail.com>
Date: Thu, 17 Sep 2015 23:45:24 -0400
Subject: [PATCH 2/2] New manual for tests

---
 doc/stdlib/test.rst | 279 +++++++++++++++++++++++++++-----------------
 1 file changed, 171 insertions(+), 108 deletions(-)

diff --git a/doc/stdlib/test.rst b/doc/stdlib/test.rst
index cfd9bf56358da..142d2efd3e2cd 100644
--- a/doc/stdlib/test.rst
+++ b/doc/stdlib/test.rst
@@ -1,6 +1,6 @@
-*****************************
- Unit and Functional Testing
-*****************************
+**************
+ Unit Testing
+**************
 
 Testing Base Julia
 ------------------
@@ -20,64 +20,185 @@ binary install, you can run the test suite using ``Base.runtests()``.
 
 .. module:: Base.Test
 
-Test Framework
---------------
+Basic Unit Tests
+----------------
 
-The ``Test`` module contains macros and functions related to testing.
-A default handler is provided to run the tests, and a custom one can be
-provided by the user by using the :func:`registerhandler` function.
+The ``Base.Test`` module provides simple *unit testing* functionality.
+Unit testing is a way to see if your code is correct by checking that
+the results are what you expect. It can be helpful to ensure your code
+still works after you make changes, and can be used when developing as
+a way of specifying the behaviors your code should have when complete.
 
-To use the default handler, the macro :func:`@test` can be used directly::
+Simple unit testing can be performed with the :func:`@test` and
+:func:`@test_throws` macros:
 
-  julia> using Base.Test
+.. function:: @test ex
 
-  julia> @test 1 == 1
+   .. Docstring generated from Julia source
 
-  julia> @test 1 == 0
-  ERROR: test failed: 1 == 0
-   in error at error.jl:21
-   in default_handler at test.jl:19
-   in do_test at test.jl:39
+   Tests that the expression ``ex`` evaluates to ``true``\ . Returns a ``Pass`` ``Result`` if it does, a ``Fail`` ``Result`` if it is ``false``\ , and an ``Error`` ``Result`` if it could not be evaluated.
 
-  julia> @test error("This is what happens when a test fails")
-  ERROR: test error during error("This is what happens when a test fails")
-  This is what happens when a test fails
-   in error at error.jl:21
-   in anonymous at test.jl:62
-   in do_test at test.jl:37
+.. function:: @test_throws extype ex
 
-As seen in the examples above, failures or errors will print the abstract
-syntax tree of the expression in question.
+   .. Docstring generated from Julia source
 
-Another macro is provided to check if the given expression throws an exception of type ``extype``,
-:func:`@test_throws`::
+   Tests that the expression ``ex`` throws an exception of type ``extype``\ .
 
-  julia> @test_throws ErrorException error("An error")
-  ErrorException("An error")
+For example, suppose we want to check our new function ``foo(x)`` works
+as expected::
 
-  julia> @test_throws BoundsError error("An error")
-  ERROR: test failed: error("An error")
-   in error at error.jl:21
-   in default_handler at test.jl:19
-   in do_test_throws at test.jl:55
+    julia> using Base.Test
 
-  julia> @test_throws DomainError throw(DomainError())
-  DomainError()
+    julia> foo(x) = length(x)^2
+    foo (generic function with 1 method)
 
-  julia> @test_throws DomainError throw(EOFError())
-  ERROR: test failed: throw(EOFError())
-   in error at error.jl:21
-   in default_handler at test.jl:19
-   in do_test_throws at test.jl:55
+If the condition is true, a ``Pass`` is returned::
+
+    julia> @test foo("bar") == 9
+    Test Passed
+      Expression: foo("bar") == 9
+       Evaluated: 9 == 9
+
+    julia> @test foo("fizz") >= 10
+    Test Passed
+      Expression: foo("fizz") >= 10
+       Evaluated: 16 >= 10
+
+If the condition is false, then a ``Fail`` is returned and an
+exception is thrown::
+
+    julia> @test foo("f") == 20
+    Test Failed
+      Expression: foo("f") == 20
+       Evaluated: 1 == 20
+    ERROR: There was an error during testing
+     in record at test.jl:268
+     in do_test at test.jl:191
+
+If the condition could not be evaluated because an exception was thrown,
+which occurs in this case because :func:`length` is not defined for
+symbols, an ``Error`` object is returned and an exception is thrown::
+
+    julia> @test foo(:cat) == 1
+    Error During Test
+      Test threw an exception of type MethodError
+      Expression: foo(:cat) == 1
+      MethodError: `length` has no method matching length(::Symbol)
+       in foo at none:1
+       in anonymous at test.jl:159
+       in do_test at test.jl:180
+    ERROR: There was an error during testing
+     in record at test.jl:268
+     in do_test at test.jl:191
+
+If we expect that evaluating an expression *should* throw an exception,
+then we can use :func:`@test_throws` to check this occurs::
 
+    julia> @test_throws MethodError foo(:cat)
+    Test Passed
+      Expression: foo(:cat)
+       Evaluated: MethodError
+
+
+Working with Test Sets
+----------------------
+
+Typically a large of number of tests are used to make sure functions
+work correctly over a range of inputs. In the event a test fails, the
+default behavior is to throw an exception immediately. However, it is
+normally preferrable to run the rest of the tests first to get a
+better picture of how many errors there are in the code being tested.
+
+The :func:`@testset` and :func:`@testloop` macros can be used to
+group tests into *sets*. All the tests in a test set will be run,
+and at the end of the test set a summary will be printed. If any of
+the tests failed, or could not be evaluated due to an error, the
+test set will then throw a ``TestSetException``.
+
+
+.. function:: @testset "description" begin ... end
+              @testset begin ... end
+
+   .. Docstring generated from Julia source
+
+   Starts a new test set. The test results will be recorded, and if there are any ``Fail``\ s or ``Error``\ s, an exception will be thrown only at the end, along with a summary of the test results.
+
+.. function:: @testloop "description $v" for v in (...) ... end
+              @testloop for x in (...), y in (...) ... end
+
+   .. Docstring generated from Julia source
 
-As floating-point values can be imprecise, you can perform approximate
-equality checks using either ``@test a ≈ b`` (where ``≈``, typed via
-tab completion of ``\approx``, is the ``isapprox`` function) or use
-the macros ``@test_approx_eq`` macro (which differs from ``isapprox``
-in that it treats NaN values as equal and has a smaller default
-tolerance) or ``@test_approx_eq_eps`` (which takes an extra argument
-indicating the relative tolerance)::
+   Starts a new test set for each iteration of the loop. The description string accepts interpolation from the loop indices. If no description is provided, one is constructed based on the variables.
+
+We can put our tests for the ``foo(x)`` function in a test set::
+
+    julia> @testset "Foo Tests" begin
+               @test foo("a")   == 1
+               @test foo("ab")  == 4
+               @test foo("abc") == 9
+           end
+    Test Summary: | Pass  Total
+    Foo Tests     |    3      3
+
+Test sets can all also be nested::
+
+    julia> @testset "Foo Tests" begin
+               @testset "Animals" begin
+                   @test foo("cat") == 9
+                   @test foo("dog") == foo("cat")
+               end
+               @testloop "Arrays $i" for i in 1:3
+                   @test foo(zeros(i)) == i^2
+                   @test foo(ones(i)) == i^2
+               end
+           end
+    Test Summary: | Pass  Total
+    Foo Tests     |    8      8
+
+In the event that a nested test set has no failures, as happened here,
+it will be hidden in the summary. If we do have a test failure, only
+the details for the failed test sets will be shown::
+
+    julia> @testset "Foo Tests" begin
+               @testset "Animals" begin
+                   @testset "Felines" begin
+                       @test foo("cat") == 9
+                   end
+                   @testset "Canines" begin
+                       @test foo("dog") == 9
+                   end
+               end
+               @testset "Arrays" begin
+                   @test foo(zeros(2)) == 4
+                   @test foo(ones(4)) == 15
+               end
+           end
+
+    Arrays: Test Failed
+      Expression: foo(ones(4)) == 15
+       Evaluated: 16 == 15
+     in record at test.jl:297
+     in do_test at test.jl:191
+    Test Summary: | Pass  Fail  Total
+    Foo Tests     |    3     1      4
+      Animals     |    2            2
+      Arrays      |    1     1      2
+    ERROR: Some tests did not pass: 3 passed, 1 failed, 0 errored.
+     in finish at test.jl:362
+
+
+Other Test Macros
+-----------------
+
+As calculations on floating-point values can be imprecise, you can
+perform approximate equality checks using either ``@test a ≈ b``
+(where ``≈``, typed via tab completion of ``\approx``,
+is the :func:`isapprox` function) or use :func:`isapprox` directly.
+
+An alternative is the ``@test_approx_eq`` macro (which differs from
+``isapprox`` in that it treats NaN values as equal and has a smaller
+default tolerance) or ``@test_approx_eq_eps`` (which takes an extra
+argument indicating the relative tolerance)::
 
   julia> @test 1 ≈ 0.999999999
 
@@ -107,58 +228,9 @@ indicating the relative tolerance)::
    in error at error.jl:22
    in test_approx_eq at test.jl:68
 
-Handlers
---------
-
-A handler is a function defined for three kinds of arguments: ``Success``, ``Failure``, ``Error``::
-
-  # An example definition of a test handler
-  test_handler(r::Success) = nothing
-  test_handler(r::Failure) = error("test failed: $(r.expr)")
-  test_handler(r::Error)   = rethrow(r)
-
-A different handler can be used for a block (with :func:`with_handler`)::
-
-  julia> using Base.Test
-
-  julia> custom_handler(r::Test.Success) = println("Success on $(r.expr)")
-  custom_handler (generic function with 1 method)
-
-  julia> custom_handler(r::Test.Failure) = error("Error on custom handler: $(r.expr)")
-  custom_handler (generic function with 2 methods)
-
-  julia> custom_handler(r::Test.Error) = rethrow(r)
-  custom_handler (generic function with 3 methods)
-
-  julia> Test.with_handler(custom_handler) do
-           @test 1 == 1
-           @test 1 != 1
-         end
-  Success on :((1==1))
-  ERROR: Error on custom handler: :((1!=1))
-   in error at error.jl:21
-   in custom_handler at none:1
-   in do_test at test.jl:39
-   in anonymous at no file:3
-   in task_local_storage at task.jl:28
-   in with_handler at test.jl:24
-
-The ``Success`` and ``Failure`` types include an additonal field, ``resultexpr``, which is a partially evaluated expression. For example, in a comparison it will contain an expression with the left and right sides evaluated.
-
-Macros
-------
-
-.. function:: @test(ex)
-
-   .. Docstring generated from Julia source
-
-   Test the expression ``ex`` and calls the current handler to handle the result.
-
-.. function:: @test_throws(extype, ex)
-
-   .. Docstring generated from Julia source
-
-   Test that the expression ``ex`` throws an exception of type ``extype`` and calls the current handler to handle the result.
+Note that these macros will fail immediately, and are not compatible
+with :func:`@testset`, so using `@test isapprox` is encouraged when
+writing new tests.
 
 .. function:: @test_approx_eq(a, b)
 
@@ -172,12 +244,3 @@ Macros
 
    Test two floating point numbers ``a`` and ``b`` for equality taking in account a margin of tolerance given by ``tol``\ .
 
-Functions
----------
-
-.. function:: with_handler(f, handler)
-
-   .. Docstring generated from Julia source
-
-   Run the function ``f`` using the ``handler`` as the handler.
-