optimagic-dev · janosg · Nov 7, 2022 · Oct 11, 2022 · Oct 12, 2022 · Oct 19, 2022
diff --git a/codecov.yml b/codecov.yml
@@ -19,3 +19,7 @@ ignore:
   - "release.py"
   - "setup.py"
   - "estimagic/tests/**/*"
+  - "src/estimagic/optimization/subsolvers/bounded_newton_quadratic_fast.py"
+  - "src/estimagic/optimization/subsolvers/_trsbox_quadratic_fast.py"
+  - "src/estimagic/optimization/subsolvers/_conjugate_gradient_quadratic_fast.py"
+  - "src/estimagic/optimization/subsolvers/_steinhaug_toint_quadratic_fast.py"
diff --git a/src/estimagic/optimization/subsolvers/_conjugate_gradient_quadratic_fast.py b/src/estimagic/optimization/subsolvers/_conjugate_gradient_quadratic_fast.py
@@ -0,0 +1,135 @@
+"""Implementation of the Conjugate Gradient algorithm."""
+import math
+
+import numpy as np
+from numba import njit
+
+
+@njit
+def minimize_trust_cg_fast(
+    model_gradient, model_hessian, trustregion_radius, gtol_abs, gtol_rel
+):
+    """Minimize the quadratic subproblem via (standard) conjugate gradient.
+
+    Solve the trust-region quadratic subproblem:
+      min_x   g.T @ x + 0.5 * x.T @ H @ x
+        s.t.   ||x|| <= trustregion_radius
+
+    approximately, where g denotes the gradient and H the hessian of the quadratic
+    model (i.e. the linear terms and square_terms), respectively.
+
+    Args:
+        model_gradient (np.ndarray): 1d array of shape (n,) containing the
+            gradient (i.e. linear terms) of the quadratic model.
+        model_hessian (np.ndarray): 2d array of shape (n, n) containing the
+            hessian (i.e .square terms) of the quadratic model.
+        trustregion_radius (float): Radius of the trust-region.
+        gtol_abs (float): Convergence tolerance for the absolute gradient norm.
+        gtol_rel (float): Convergence tolerance for the relative gradient norm.
+
+    Returns:
+        np.ndarray: Solution vector of shape (n,).
+    """
+    n = len(model_gradient)
+    max_iter = n * 2
+    x_candidate = np.zeros(n)
+
+    residual = model_gradient
+    direction = -model_gradient
+
+    gradient_norm = np.linalg.norm(residual)
+    stop_tol = max(gtol_abs, gtol_rel * gradient_norm)
+
+    for _ in range(max_iter):
+
+        if gradient_norm <= stop_tol:
+            break
+
+        square_terms = direction.T @ model_hessian @ direction
+
+        distance_to_boundary = _get_distance_to_trustregion_boundary(
+            x_candidate, direction, trustregion_radius
+        )
+
+        step_size = (residual @ residual) / square_terms
+
+        if square_terms <= 0 or step_size > distance_to_boundary:
+            x_candidate = x_candidate + distance_to_boundary * direction
+            break
+
+        x_candidate, residual, direction = _update_vectors_for_next_iteration(
+            x_candidate, residual, direction, model_hessian, step_size
+        )
+        gradient_norm = np.linalg.norm(residual)
+
+    return x_candidate
+
+
+@njit
+def _update_vectors_for_next_iteration(
+    x_candidate, residual, direction, hessian, alpha
+):
+    """Update candidate, residual, and direction vectors for the next iteration.
+
+    Args:
+        x_candidate (np.ndarray): Candidate vector of shape (n,).
+        residual (np.ndarray): Array of residuals of shape (n,). The residual vector
+            is defined as `r = Ax - b`, where `A` denotes the hessian matrix and `b` the
+            gradient vector of the quadratic trust-region subproblem.
+            `r` is equivalent to the first derivative of the quadratic subproblem.
+        direction (np.ndarray): Direction vector of shape (n,).
+
+    Returns:
+        (tuple) Tuple containing:
+            - x_candidate (np.ndarray): Updated candidate vector of shape (n,).
+            - residual (np.ndarray): Updated array of residuals of shape (n,).
+            - direction (np.darray): Updated direction vector of shape (n,).
+    """
+    residual_new = np.zeros(len(residual))
+    nom = 0
+    denom = 0
+    for i in range(len(x_candidate)):
+        x_candidate[i] = x_candidate[i] + alpha * direction[i]
+        temp = 0
+        for j in range(len(x_candidate)):
+            temp += hessian[i, j] * direction[j]
+        residual_new[i] = temp * alpha + residual[i]
+
+        nom += residual_new[i] * residual_new[i]
+        denom += residual[i] * residual[i]
+    beta = nom / denom
+    direction = -residual_new + beta * direction
+
+    return x_candidate, residual_new, direction
+
+
+@njit
+def _get_distance_to_trustregion_boundary(candidate, direction, radius):
+    """Compute the distance of the candidate vector to trustregion boundary.
+
+    The positive distance sigma is defined in Eculidean norm, as follows:
+
+        || x + sigma * d || = radius
+
+    where x denotes the candidate vector, and d the direction vector.
+
+    Args:
+        candidate(np.ndarray): Candidate vector of shape (n,).
+        direction (np.ndarray): Direction vector of shape (n,).
+        radius (floar): Radius of the trust-region
+
+    Returns:
+        float: The candidate vector's distance to the trustregion
+            boundary.
+    """
+    cc = 0
+    cd = 0
+    dd = 0
+    for i in range(len(direction)):
+        cc += candidate[i] ** 2
+        dd += direction[i] ** 2
+        cd += candidate[i] * direction[i]
+    sigma = -cd + math.sqrt(cd * cd + dd * (radius**2 - cc))
+    sigma = sigma / dd
+
+    return sigma
diff --git a/src/estimagic/optimization/subsolvers/_steihaug_toint_quadratic_fast.py b/src/estimagic/optimization/subsolvers/_steihaug_toint_quadratic_fast.py
@@ -0,0 +1,204 @@
+"""Implementation of the Steihaug-Toint Conjugate Gradient algorithm."""
+import numpy as np
+from numba import njit
+
+
+@njit
+def minimize_trust_stcg_fast(model_gradient, model_hessian, trustregion_radius):
+    """Minimize the quadratic subproblem via Steihaug-Toint conjugate gradient.
+
+    Solve the quadratic trust-region subproblem:
+
+      min_x   g.T @ x + 0.5 * x.T @ hess @ x
+        s.t.   ||x|| <= trustregion_radius
+
+    approximately, where g denotes the gradient and hess the hessian of the quadratic
+    model (i.e. the linear terms and square_terms), respectively.
+
+    The Steihaug-Toint conjugate gradient method is based on Steihaug
+    (:cite:`Steihaug1983`) and Toint (:cite:`Toint1981`).
+
+    Args:
+        model_gradient (np.ndarray): 1d array of shape (n,) containing the
+            gradient (i.e. linear terms) of the quadratic model.
+        model_hessian (np.ndarray): 2d array of shape (n, n) containing the
+            hessian (i.e .square terms) of the quadratic model.
+        trustregion_radius (float): Radius of the trust-region.
+
+    Returns:
+        np.ndarray: Solution vector of shape (n,).
+    """
+    abstol = 1e-50
+    rtol = 1e-5
+    divtol = 10_000
+
+    n = len(model_gradient)
+    radius_sq = trustregion_radius**2
+
+    residual = -model_gradient
+    rr = residual.T @ residual
+
+    x_candidate = np.zeros(n)
+
+    max_iter = min(n, 10_000)
+
+    z = np.linalg.pinv(model_hessian) @ residual
+    rz = residual @ residual
+
+    n_iter = 0
+    diverged = False
+    converged = False
+
+    norm_r = np.sqrt(rr)
+    norm_r0 = norm_r
+    if rtol * norm_r0 >= abstol:
+        ttol = rtol * norm_r0
+    else:
+        ttol = abstol
+
+    converged, diverged = _check_convergence(
+        norm_r, norm_r0, abstol, ttol, divtol, converged, diverged
+    )
+
+    p = model_hessian @ z
+    z = model_hessian @ p
+    n_iter += 1
+
+    kappa = p @ z
+
+    dp = 0
+    norm_d = 0
+    norm_p = p @ p
+
+    if kappa <= 0:
+        converged = True
+
+        x_candidate, z, n_iter = _update_candidate_vector_and_iteration_number(
+            x_candidate,
+            residual,
+            p,
+            z,
+            model_gradient,
+            model_hessian,
+            rr,
+            trustregion_radius,
+            norm_p,
+            n_iter,
+        )
+
+    for _ in range(max_iter):
+        alpha = rz / kappa
+        norm_dp1 = norm_d + alpha * (2 * dp + alpha * norm_p)
+
+        if trustregion_radius != 0 and norm_dp1 >= radius_sq:
+            converged = True
+
+            if norm_p > 0:
+                x_candidate = _take_step_to_trustregion_boundary(
+                    x_candidate, p, dp, radius_sq, norm_d, norm_p
+                )
+
+            break
+
+        x_candidate = x_candidate + alpha * p
+        residual = residual - alpha * (model_hessian @ p)
+
+        norm_d = x_candidate @ x_candidate
+
+        rzm1 = rz
+        rz = residual @ residual
+
+        norm_r = np.linalg.norm(residual)
+
+        converged, diverged = _check_convergence(
+            norm_r, norm_r0, abstol, ttol, divtol, converged, diverged
+        )
+
+        if converged or diverged:
+            break
+
+        beta = rz / rzm1
+
+        if abs(beta) <= 0:
+            diverged = True
+            break
+
+        if n_iter >= max_iter:
+            diverged = True
+            break
+
+        p = residual + beta * p
+
+        dp = x_candidate @ p
+        norm_p = p @ p
+
+        z = model_hessian @ p
+        kappa = p @ z
+        n_iter += 1
+
+        if kappa <= 0:
+            converged = True
+
+            if trustregion_radius != 0 and norm_p > 0:
+                x_candidate = _take_step_to_trustregion_boundary(
+                    x_candidate, p, dp, radius_sq, norm_d, norm_p
+                )
+
+            break
+
+    return x_candidate
+
+
+@njit
+def _update_candidate_vector_and_iteration_number(
+    x_candidate,
+    residual,
+    p,
+    z,
+    model_gradient,
+    model_hessian,
+    rr,
+    radius,
+    norm_p,
+    n_iter,
+):
+    """Update candidate, z vector, and iteration number."""
+    radius_sq = radius**2
+
+    if radius != 0 and norm_p > 0:
+        # Take step to boundary
+        step = np.sqrt(radius_sq / norm_p)
+        x_candidate = x_candidate + step * p
+
+    elif radius != 0:
+        if radius_sq >= rr:
+            alpha = 1.0
+        else:
+            alpha = np.sqrt(radius_sq / rr)
+
+        x_candidate = x_candidate + alpha * residual
+        z = model_gradient - 0.5 * (model_hessian @ x_candidate)
+
+        n_iter += 1
+
+    return x_candidate, z, n_iter
+
+
+@njit
+def _take_step_to_trustregion_boundary(x_candidate, p, dp, radius_sq, norm_d, norm_p):
+    """Take step to trust-region boundary."""
+    step = (np.sqrt(dp * dp + norm_p * (radius_sq - norm_d)) - dp) / norm_p
+    x_candidate = x_candidate + step * p
+
+    return x_candidate
+
+
+@njit
+def _check_convergence(rnorm, rnorm0, abstol, ttol, divtol, converged, diverged):
+    """Check for convergence."""
+    if rnorm <= ttol:
+        converged = True
+    elif rnorm >= divtol * rnorm0:
+        diverged = True
+
+    return converged, diverged
diff --git a/src/estimagic/optimization/subsolvers/_trsbox_quadratic.py b/src/estimagic/optimization/subsolvers/_trsbox_quadratic.py
@@ -570,17 +570,26 @@ def _update_candidate_vectors_and_reduction_alt_step(
     If the angle of the alternative iteration is restricted by a bound on a
     free variable, that variable is fixed at the bound.
     """
-    gradient_candidate += (cosine - 1.0) * hessian_reduced + sine * hess_s
-    x_candidate[x_bounded == 0] = (
+    gradient_candidate_new = (
+        gradient_candidate + (cosine - 1.0) * hessian_reduced + sine * hess_s
+    )
+    x_candidate_new = np.copy(x_candidate)
+    x_candidate_new[x_bounded == 0] = (
         cosine * x_candidate[x_bounded == 0] + sine * search_direction[x_bounded == 0]
     )
-    x_grad = x_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
+    x_grad = x_candidate_new[x_bounded == 0] @ gradient_candidate_new[x_bounded == 0]
     gradient_reduced = (
-        gradient_candidate[x_bounded == 0] @ gradient_candidate[x_bounded == 0]
+        gradient_candidate_new[x_bounded == 0] @ gradient_candidate_new[x_bounded == 0]
     )
     hessian_reduced = cosine * hessian_reduced + sine * hess_s
 
-    return x_candidate, gradient_candidate, x_grad, gradient_reduced, hessian_reduced
+    return (
+        x_candidate_new,
+        gradient_candidate_new,
+        x_grad,
+        gradient_reduced,
+        hessian_reduced,
+    )
 
 
 def _compute_new_search_direction_and_norm(