diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 99d9030ad2..612b5a98cc 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -31,6 +31,8 @@ set(MUJOCO_ENGINE_SRCS engine_crossplatform.h engine_derivative.c engine_derivative.h + engine_derivative_fd.c + engine_derivative_fd.h engine_file.c engine_file.h engine_forward.c diff --git a/src/engine/engine_derivative.c b/src/engine/engine_derivative.c index eaf8b1e195..3d6a848b4d 100644 --- a/src/engine/engine_derivative.c +++ b/src/engine/engine_derivative.c @@ -14,16 +14,11 @@ #include "engine/engine_derivative.h" -#include -#include - #include #include -#include "engine/engine_forward.h" #include "engine/engine_core_constraint.h" #include "engine/engine_crossplatform.h" #include "engine/engine_io.h" -#include "engine/engine_inverse.h" #include "engine/engine_macro.h" #include "engine/engine_passive.h" #include "engine/engine_support.h" @@ -224,155 +219,8 @@ static void mjd_mulInertVec_vel(mjtNum D[36], const mjtNum i[10]) -//--------------------------- utility functions for mjd_stepFD ------------------------------------- - -// get state=[qpos; qvel; act] and optionally sensordata -static void getState(const mjModel* m, const mjData* d, mjtNum* state, mjtNum* sensordata) { - int nq = m->nq, nv = m->nv, na = m->na; - - mju_copy(state, d->qpos, nq); - mju_copy(state+nq, d->qvel, nv); - mju_copy(state+nq+nv, d->act, na); - if (sensordata) { - mju_copy(sensordata, d->sensordata, m->nsensordata); - } -} - - - -// set state=[qpos; qvel; act] and optionally warmstart accelerations -static void setState(const mjModel* m, mjData* d, mjtNum time, const mjtNum* state, - const mjtNum* ctrl, const mjtNum* warmstart) { - int nq = m->nq, nv = m->nv, na = m->na; - - d->time = time; - mju_copy(d->qpos, state, nq); - mju_copy(d->qvel, state+nq, nv); - mju_copy(d->act, state+nq+nv, na); - if (ctrl) { - mju_copy(d->ctrl, ctrl, m->nu); - } - if (warmstart) { - mju_copy(d->qacc_warmstart, warmstart, nv); - } -} - - - -// dx = (x2 - x1) / h -static void diff(mjtNum* restrict dx, const mjtNum* x1, const mjtNum* x2, mjtNum h, int n) { - mjtNum inv_h = 1/h; - for (int i=0; inq, nv = m->nv, na = m->na; - - if (nq == nv) { - diff(ds, s1, s2, h, nq+nv+na); - } else { - mj_differentiatePos(m, ds, h, s1, s2); - diff(ds+nv, s1+nq, s2+nq, h, nv+na); - } -} - - - -// finite-difference two vectors, forward, backward or centered -static void clampedDiff(mjtNum* dx, const mjtNum* x, const mjtNum* x_plus, const mjtNum* x_minus, - mjtNum h, int nx) { - if (x_plus && !x_minus) { - // forward differencing - diff(dx, x, x_plus, h, nx); - } else if (!x_plus && x_minus) { - // backward differencing - diff(dx, x_minus, x, h, nx); - } else if (x_plus && x_minus) { - // centered differencing - diff(dx, x_plus, x_minus, 2*h, nx); - } else { - // differencing failed, write zeros - mju_zero(dx, nx); - } -} - - - -// finite-difference two state vectors, forward, backward or centered -static void clampedStateDiff(const mjModel* m, mjtNum* ds, const mjtNum* s, const mjtNum* s_plus, - const mjtNum* s_minus, mjtNum h) { - if (s_plus && !s_minus) { - // forward differencing - stateDiff(m, ds, s, s_plus, h); - } else if (!s_plus && s_minus) { - // backward differencing - stateDiff(m, ds, s_minus, s, h); - } else if (s_plus && s_minus) { - // centered differencing - stateDiff(m, ds, s_minus, s_plus, 2*h); - } else { - // differencing failed, write zeros - mju_zero(ds, m->nq + m->nv + m->na); - } -} - - - -// check if two numbers are inside a given range -static int inRange(const mjtNum x1, const mjtNum x2, const mjtNum* range) { - return x1 >= range[0] && x1 <= range[1] && - x2 >= range[0] && x2 <= range[1]; -} - - - -// advance simulation using control callback, skipstage is mjtStage -void mj_stepSkip(const mjModel* m, mjData* d, int skipstage, int skipsensor) { - TM_START; - - // common to all integrators - mj_checkPos(m, d); - mj_checkVel(m, d); - mj_forwardSkip(m, d, skipstage, skipsensor); - mj_checkAcc(m, d); - - // compare forward and inverse solutions if enabled - if (mjENABLED(mjENBL_FWDINV)) { - mj_compareFwdInv(m, d); - } - - // use selected integrator - switch (m->opt.integrator) { - case mjINT_EULER: - mj_EulerSkip(m, d, skipstage >= mjSTAGE_POS); - break; - - case mjINT_RK4: - // ignore skipstage - mj_RungeKutta(m, d, 4); - break; - - case mjINT_IMPLICIT: - case mjINT_IMPLICITFAST: - mj_implicitSkip(m, d, skipstage >= mjSTAGE_VEL); - break; - - default: - mju_error("Invalid integrator"); - } - - TM_END(mjTIMER_STEP); -} - - - //------------------------- dense derivatives of component functions ------------------------------- -// no longer used, for comparison only +// no longer used, except in tests // derivative of cvel, cdof_dot w.r.t qvel (dense version) static void mjd_comVel_vel_dense(const mjModel* m, mjData* d, mjtNum* Dcvel, mjtNum* Dcdofdot) @@ -1604,121 +1452,6 @@ void mjd_passive_vel(const mjModel* m, mjData* d) { -// add forward fin-diff approximation of (d qfrc_passive / d qvel) to qDeriv -void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps) { - int nv = m->nv; - - mjMARKSTACK; - mjtNum* qfrc_passive = mj_stackAlloc(d, nv); - mjtNum* fd = mj_stackAlloc(d, nv); - int* cnt = (int*)mj_stackAlloc(d, nv); - - // clear row counters - memset(cnt, 0, nv*sizeof(int)); - - // save qfrc_passive, assume mj_fwdVelocity was called - mju_copy(qfrc_passive, d->qfrc_passive, nv); - - // loop over dofs - for (int i=0; iqvel[i]; - - // eval at qvel[i]+eps - d->qvel[i] = saveqvel + eps; - mj_fwdVelocity(m, d); - - // restore qvel[i] - d->qvel[i] = saveqvel; - - // finite difference result in fd - mju_sub(fd, d->qfrc_passive, qfrc_passive, nv); - mju_scl(fd, fd, 1/eps, nv); - - // copy to i-th column of qDeriv - for (int j=0; jD_rowadr[j] + cnt[j]; - if (cnt[j]D_rownnz[j] && d->D_colind[adr] == i) { - d->qDeriv[adr] = fd[j]; - cnt[j]++; - } - } - } - - // restore - mj_fwdVelocity(m, d); - - mjFREESTACK; -} - - - -//-------------------- derivatives of all smooth (unconstrained) forces ---------------------------- - -// centered finite difference approximation to mjd_smooth_vel -void mjd_smooth_velFD(const mjModel* m, mjData* d, mjtNum eps) { - int nv = m->nv; - - mjMARKSTACK; - mjtNum* plus = mj_stackAlloc(d, nv); - mjtNum* minus = mj_stackAlloc(d, nv); - mjtNum* fd = mj_stackAlloc(d, nv); - int* cnt = (int*) mj_stackAlloc(d, nv); - - // clear row counters - memset(cnt, 0, nv*sizeof(int)); - - // loop over dofs - for (int i=0; iqvel[i]; - - // eval at qvel[i]+eps - d->qvel[i] = saveqvel + eps; - mj_fwdVelocity(m, d); - mj_fwdActuation(m, d); - mju_add(plus, d->qfrc_actuator, d->qfrc_passive, nv); - mju_subFrom(plus, d->qfrc_bias, nv); - - // eval at qvel[i]-eps - d->qvel[i] = saveqvel - eps; - mj_fwdVelocity(m, d); - mj_fwdActuation(m, d); - mju_add(minus, d->qfrc_actuator, d->qfrc_passive, nv); - mju_subFrom(minus, d->qfrc_bias, nv); - - // restore qvel[i] - d->qvel[i] = saveqvel; - - // finite difference result in fd - mju_sub(fd, plus, minus, nv); - mju_scl(fd, fd, 0.5/eps, nv); - - // copy to sparse qDeriv - for (int j=0; jD_rownnz[j] && d->D_colind[d->D_rowadr[j]+cnt[j]]==i) { - d->qDeriv[d->D_rowadr[j]+cnt[j]] = fd[j]; - cnt[j]++; - } - } - } - - // make sure final row counters equal rownnz - for (int i=0; iD_rownnz[i]) { - mju_error("error in constructing FD sparse derivative"); - } - } - - // restore - mj_fwdVelocity(m, d); - mj_fwdActuation(m, d); - - mjFREESTACK; -} - - - //------------------------- main entry points ------------------------------------------------------ // analytical derivative of smooth forces w.r.t velocities: @@ -1738,307 +1471,3 @@ void mjd_smooth_vel(const mjModel* m, mjData* d, int flg_bias) { mjd_rne_vel(m, d); } } - - -// finite differenced Jacobian of (next_state, sensors) = mj_step(state, control) -// all outputs are optional -// output dimensions (transposed w.r.t Control Theory convention): -// DyDq: (nv x 2*nv+na) -// DyDv: (nv x 2*nv+na) -// DyDa: (na x 2*nv+na) -// DyDu: (nu x 2*nv+na) -// DsDq: (nv x nsensordata) -// DsDv: (nv x nsensordata) -// DsDa: (na x nsensordata) -// DsDu: (nu x nsensordata) -// single-letter shortcuts: -// inputs: q=qpos, v=qvel, a=act, u=ctrl -// outputs: y=next_state (concatenated next qpos, qvel, act), s=sensordata -void mjd_stepFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, - mjtNum* DyDq, mjtNum* DyDv, mjtNum* DyDa, mjtNum* DyDu, - mjtNum* DsDq, mjtNum* DsDv, mjtNum* DsDa, mjtNum* DsDu) { - int nq = m->nq, nv = m->nv, na = m->na, nu = m->nu, ns = m->nsensordata; - int ndx = 2*nv+na; // row length of Dy Jacobians - mjMARKSTACK; - - // states - mjtNum *state = mj_stackAlloc(d, nq+nv+na); // current state - mjtNum *next = mj_stackAlloc(d, nq+nv+na); // next state - mjtNum *next_plus = mj_stackAlloc(d, nq+nv+na); // forward-nudged next state - mjtNum *next_minus = mj_stackAlloc(d, nq+nv+na); // backward-nudged next state - - // warmstart accelerations - mjtNum *warmstart = mjDISABLED(mjDSBL_WARMSTART) ? NULL : mj_stackAlloc(d, nv); - - // sensors - int skipsensor = !DsDq && !DsDv && !DsDa && !DsDu; - mjtNum *sensor = skipsensor ? NULL : mj_stackAlloc(d, ns); // sensor values - mjtNum *sensor_plus = skipsensor ? NULL : mj_stackAlloc(d, ns); // forward-nudged sensors - mjtNum *sensor_minus = skipsensor ? NULL : mj_stackAlloc(d, ns); // backward-nudged sensors - - // controls - mjtNum *ctrl = mj_stackAlloc(d, nu); - - // save current inputs - mjtNum time = d->time; - mju_copy(ctrl, d->ctrl, nu); - getState(m, d, state, NULL); - if (warmstart) { - mju_copy(warmstart, d->qacc_warmstart, nv); - } - - // step input - mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); - - // save output - getState(m, d, next, sensor); - - // restore input - setState(m, d, time, state, ctrl, warmstart); - - // finite-difference controls: skip=mjSTAGE_VEL, handle ctrl at range limits - if (DyDu || DsDu) { - for (int i=0; iactuator_ctrllimited[i]; - // nudge forward, if possible given ctrlrange - int nudge_fwd = !limited || inRange(ctrl[i], ctrl[i]+eps, m->actuator_ctrlrange+2*i); - if (nudge_fwd) { - // nudge forward - d->ctrl[i] += eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); - getState(m, d, next_plus, sensor_plus); - - // reset - setState(m, d, time, state, ctrl, warmstart); - } - - // nudge backward, if possible given ctrlrange - int nudge_back = (centered || !nudge_fwd) && - (!limited || inRange(ctrl[i]-eps, ctrl[i], m->actuator_ctrlrange+2*i)); - if (nudge_back) { - // nudge backward - d->ctrl[i] -= eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); - getState(m, d, next_minus, sensor_minus); - - // reset - setState(m, d, time, state, ctrl, warmstart); - } - - // difference states - if (DyDu) { - clampedStateDiff(m, DyDu+i*ndx, next, nudge_fwd ? next_plus : NULL, - nudge_back ? next_minus : NULL, eps); - } - - // difference sensors - if (DsDu) { - clampedDiff(DsDu+i*ns, sensor, nudge_fwd ? sensor_plus : NULL, - nudge_back ? sensor_minus : NULL, eps, ns); - } - } - } - - // finite-difference activations: skip=mjSTAGE_VEL - if (DyDa || DsDa) { - for (int i=0; iact[i] += eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); - getState(m, d, next_plus, sensor_plus); - - // reset - setState(m, d, time, state, NULL, warmstart); - - // nudge backward - if (centered) { - // nudge backward - d->act[i] -= eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); - getState(m, d, next_minus, sensor_minus); - - // reset - setState(m, d, time, state, NULL, warmstart); - } - - // difference states - if (DyDa) { - if (!centered) { - stateDiff(m, DyDa+i*ndx, next, next_plus, eps); - } else { - stateDiff(m, DyDa+i*ndx, next_minus, next_plus, 2*eps); - } - } - - // difference sensors - if (DsDa) { - if (!centered) { - diff(DsDa+i*ns, sensor, sensor_plus, eps, ns); - } else { - diff(DsDa+i*ns, sensor_minus, sensor_plus, 2*eps, ns); - } - } - } - } - - - // finite-difference velocities: skip=mjSTAGE_POS - if (DyDv || DsDv) { - for (int i=0; iqvel[i] += eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_POS, skipsensor); - getState(m, d, next_plus, sensor_plus); - - // reset - setState(m, d, time, state, NULL, warmstart); - - // nudge backward - if (centered) { - // nudge - d->qvel[i] -= eps; - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_POS, skipsensor); - getState(m, d, next_minus, sensor_minus); - - // reset - setState(m, d, time, state, NULL, warmstart); - } - - // difference states - if (DyDv) { - if (!centered) { - stateDiff(m, DyDv+i*ndx, next, next_plus, eps); - } else { - stateDiff(m, DyDv+i*ndx, next_minus, next_plus, 2*eps); - } - } - - // difference sensors - if (DsDv) { - if (!centered) { - diff(DsDv+i*ns, sensor, sensor_plus, eps, ns); - } else { - diff(DsDv+i*ns, sensor_minus, sensor_plus, 2*eps, ns); - } - } - } - } - - // finite-difference positions: skip=mjSTAGE_NONE - if (DyDq || DsDq) { - mjtNum *dpos = mj_stackAlloc(d, nv); // allocate position perturbation - for (int i=0; iqpos, dpos, eps); - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); - getState(m, d, next_plus, sensor_plus); - - // reset - setState(m, d, time, state, NULL, warmstart); - - // nudge backward - if (centered) { - // nudge backward - mju_zero(dpos, nv); - dpos[i] = 1; - mj_integratePos(m, d->qpos, dpos, -eps); - - // step, get nudged output - mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); - getState(m, d, next_minus, sensor_minus); - - // reset - setState(m, d, time, state, NULL, warmstart); - } - - // difference states - if (DyDq) { - if (!centered) { - stateDiff(m, DyDq+i*ndx, next, next_plus, eps); - } else { - stateDiff(m, DyDq+i*ndx, next_minus, next_plus, 2*eps); - } - } - - // difference sensors - if (DsDq) { - if (!centered) { - diff(DsDq+i*ns, sensor, sensor_plus, eps, ns); - } else { - diff(DsDq+i*ns, sensor_minus, sensor_plus, 2*eps, ns); - } - } - } - } - - mjFREESTACK; -} - - - -// finite differenced transition matrices (control theory notation) -// d(x_next) = A*dx + B*du -// d(sensor) = C*dx + D*du -// required output matrix dimensions: -// A: (2*nv+na x 2*nv+na) -// B: (2*nv+na x nu) -// D: (nsensordata x 2*nv+na) -// C: (nsensordata x nu) -void mjd_transitionFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, - mjtNum* A, mjtNum* B, mjtNum* C, mjtNum* D) { - int nv = m->nv, na = m->na, nu = m->nu, ns = m->nsensordata; - int ndx = 2*nv+na; // row length of state Jacobians - - // stepFD() offset pointers, initialised to NULL - mjtNum *DyDq, *DyDv, *DyDa, *DsDq, *DsDv, *DsDa; - DyDq = DyDv = DyDa = DsDq = DsDv = DsDa = NULL; - - mjMARKSTACK; - - // allocate transposed matrices - mjtNum *AT = A ? mj_stackAlloc(d, ndx*ndx) : NULL; // state-transition matrix (transposed) - mjtNum *BT = B ? mj_stackAlloc(d, nu*ndx) : NULL; // control-transition matrix (transposed) - mjtNum *CT = C ? mj_stackAlloc(d, ndx*ns) : NULL; // state-observation matrix (transposed) - mjtNum *DT = D ? mj_stackAlloc(d, nu*ns) : NULL; // control-observation matrix (transposed) - - // set offset pointers - if (A) { - DyDq = AT; - DyDv = AT+ndx*nv; - DyDa = AT+ndx*2*nv; - } - - if (C) { - DsDq = CT; - DsDv = CT + ns*nv; - DsDa = CT + ns*2*nv; - } - - // get Jacobians - mjd_stepFD(m, d, eps, centered, DyDq, DyDv, DyDa, BT, DsDq, DsDv, DsDa, DT); - - - // transpose - if (A) mju_transpose(A, AT, ndx, ndx); - if (B) mju_transpose(B, BT, nu, ndx); - if (C) mju_transpose(C, CT, ndx, ns); - if (D) mju_transpose(D, DT, nu, ns); - - mjFREESTACK; -} diff --git a/src/engine/engine_derivative.h b/src/engine/engine_derivative.h index ab093415cc..86f7cd6607 100644 --- a/src/engine/engine_derivative.h +++ b/src/engine/engine_derivative.h @@ -27,9 +27,6 @@ extern "C" { // d->qDeriv = d (qfrc_actuator + qfrc_passive - [qfrc_bias]) / d qvel MJAPI void mjd_smooth_vel(const mjModel* m, mjData* d, int flg_bias); -// centered finite difference approximation to mjd_smooth_vel -MJAPI void mjd_smooth_velFD(const mjModel* m, mjData* d, mjtNum eps); - // add (d qfrc_actuator / d qvel) to qDeriv MJAPI void mjd_actuator_vel(const mjModel* m, mjData* d); @@ -39,16 +36,6 @@ MJAPI void mjd_passive_vel(const mjModel* m, mjData* d); // subtract (d qfrc_bias / d qvel) from qDeriv (dense version) MJAPI void mjd_rne_vel_dense(const mjModel* m, mjData* d); -// add forward finite difference approximation of (d qfrc_passive / d qvel) to qDeriv -MJAPI void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps); - -// advance simulation using control callback, skipstage is mjtStage -MJAPI void mj_stepSkip(const mjModel* m, mjData* d, int skipstage, int skipsensor); - -// finite differenced transition matrices (control theory notation) -MJAPI void mjd_transitionFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, - mjtNum* A, mjtNum* B, mjtNum* C, mjtNum* D); - #ifdef __cplusplus } #endif diff --git a/src/engine/engine_derivative_fd.c b/src/engine/engine_derivative_fd.c new file mode 100644 index 0000000000..b08863a6d9 --- /dev/null +++ b/src/engine/engine_derivative_fd.c @@ -0,0 +1,600 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "engine/engine_derivative_fd.h" + +#include +#include + +#include +#include +#include "engine/engine_forward.h" +#include "engine/engine_io.h" +#include "engine/engine_inverse.h" +#include "engine/engine_macro.h" +#include "engine/engine_support.h" +#include "engine/engine_util_blas.h" +#include "engine/engine_util_errmem.h" + + + +//--------------------------- finite-differencing utility functions -------------------------------- + +// get state=[qpos; qvel; act] and optionally sensordata +static void getState(const mjModel* m, const mjData* d, mjtNum* state, mjtNum* sensordata) { + int nq = m->nq, nv = m->nv, na = m->na; + + mju_copy(state, d->qpos, nq); + mju_copy(state+nq, d->qvel, nv); + mju_copy(state+nq+nv, d->act, na); + if (sensordata) { + mju_copy(sensordata, d->sensordata, m->nsensordata); + } +} + + + +// set state=[qpos; qvel; act] and optionally warmstart accelerations +static void setState(const mjModel* m, mjData* d, mjtNum time, const mjtNum* state, + const mjtNum* ctrl, const mjtNum* warmstart) { + int nq = m->nq, nv = m->nv, na = m->na; + + d->time = time; + mju_copy(d->qpos, state, nq); + mju_copy(d->qvel, state+nq, nv); + mju_copy(d->act, state+nq+nv, na); + if (ctrl) { + mju_copy(d->ctrl, ctrl, m->nu); + } + if (warmstart) { + mju_copy(d->qacc_warmstart, warmstart, nv); + } +} + + + +// dx = (x2 - x1) / h +static void diff(mjtNum* restrict dx, const mjtNum* x1, const mjtNum* x2, mjtNum h, int n) { + mjtNum inv_h = 1/h; + for (int i=0; inq, nv = m->nv, na = m->na; + + if (nq == nv) { + diff(ds, s1, s2, h, nq+nv+na); + } else { + mj_differentiatePos(m, ds, h, s1, s2); + diff(ds+nv, s1+nq, s2+nq, h, nv+na); + } +} + + + +// finite-difference two vectors, forward, backward or centered +static void clampedDiff(mjtNum* dx, const mjtNum* x, const mjtNum* x_plus, const mjtNum* x_minus, + mjtNum h, int nx) { + if (x_plus && !x_minus) { + // forward differencing + diff(dx, x, x_plus, h, nx); + } else if (!x_plus && x_minus) { + // backward differencing + diff(dx, x_minus, x, h, nx); + } else if (x_plus && x_minus) { + // centered differencing + diff(dx, x_plus, x_minus, 2*h, nx); + } else { + // differencing failed, write zeros + mju_zero(dx, nx); + } +} + + + +// finite-difference two state vectors, forward, backward or centered +static void clampedStateDiff(const mjModel* m, mjtNum* ds, const mjtNum* s, const mjtNum* s_plus, + const mjtNum* s_minus, mjtNum h) { + if (s_plus && !s_minus) { + // forward differencing + stateDiff(m, ds, s, s_plus, h); + } else if (!s_plus && s_minus) { + // backward differencing + stateDiff(m, ds, s_minus, s, h); + } else if (s_plus && s_minus) { + // centered differencing + stateDiff(m, ds, s_minus, s_plus, 2*h); + } else { + // differencing failed, write zeros + mju_zero(ds, m->nq + m->nv + m->na); + } +} + + + +// check if two numbers are inside a given range +static int inRange(const mjtNum x1, const mjtNum x2, const mjtNum* range) { + return x1 >= range[0] && x1 <= range[1] && + x2 >= range[0] && x2 <= range[1]; +} + + + +// advance simulation using control callback, skipstage is mjtStage +void mj_stepSkip(const mjModel* m, mjData* d, int skipstage, int skipsensor) { + TM_START; + + // common to all integrators + mj_checkPos(m, d); + mj_checkVel(m, d); + mj_forwardSkip(m, d, skipstage, skipsensor); + mj_checkAcc(m, d); + + // compare forward and inverse solutions if enabled + if (mjENABLED(mjENBL_FWDINV)) { + mj_compareFwdInv(m, d); + } + + // use selected integrator + switch (m->opt.integrator) { + case mjINT_EULER: + mj_EulerSkip(m, d, skipstage >= mjSTAGE_POS); + break; + + case mjINT_RK4: + // ignore skipstage + mj_RungeKutta(m, d, 4); + break; + + case mjINT_IMPLICIT: + case mjINT_IMPLICITFAST: + mj_implicitSkip(m, d, skipstage >= mjSTAGE_VEL); + break; + + default: + mju_error("Invalid integrator"); + } + + TM_END(mjTIMER_STEP); +} + + + +//------------------------- derivatives of passive forces ------------------------------------------ + +// add forward fin-diff approximation of (d qfrc_passive / d qvel) to qDeriv +void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps) { + int nv = m->nv; + + mjMARKSTACK; + mjtNum* qfrc_passive = mj_stackAlloc(d, nv); + mjtNum* fd = mj_stackAlloc(d, nv); + int* cnt = (int*)mj_stackAlloc(d, nv); + + // clear row counters + memset(cnt, 0, nv*sizeof(int)); + + // save qfrc_passive, assume mj_fwdVelocity was called + mju_copy(qfrc_passive, d->qfrc_passive, nv); + + // loop over dofs + for (int i=0; iqvel[i]; + + // eval at qvel[i]+eps + d->qvel[i] = saveqvel + eps; + mj_fwdVelocity(m, d); + + // restore qvel[i] + d->qvel[i] = saveqvel; + + // finite difference result in fd + mju_sub(fd, d->qfrc_passive, qfrc_passive, nv); + mju_scl(fd, fd, 1/eps, nv); + + // copy to i-th column of qDeriv + for (int j=0; jD_rowadr[j] + cnt[j]; + if (cnt[j]D_rownnz[j] && d->D_colind[adr] == i) { + d->qDeriv[adr] = fd[j]; + cnt[j]++; + } + } + } + + // restore + mj_fwdVelocity(m, d); + + mjFREESTACK; +} + + + +//-------------------- derivatives of all smooth (unconstrained) forces ---------------------------- + +// centered finite difference approximation to mjd_smooth_vel +void mjd_smooth_velFD(const mjModel* m, mjData* d, mjtNum eps) { + int nv = m->nv; + + mjMARKSTACK; + mjtNum* plus = mj_stackAlloc(d, nv); + mjtNum* minus = mj_stackAlloc(d, nv); + mjtNum* fd = mj_stackAlloc(d, nv); + int* cnt = (int*) mj_stackAlloc(d, nv); + + // clear row counters + memset(cnt, 0, nv*sizeof(int)); + + // loop over dofs + for (int i=0; iqvel[i]; + + // eval at qvel[i]+eps + d->qvel[i] = saveqvel + eps; + mj_fwdVelocity(m, d); + mj_fwdActuation(m, d); + mju_add(plus, d->qfrc_actuator, d->qfrc_passive, nv); + mju_subFrom(plus, d->qfrc_bias, nv); + + // eval at qvel[i]-eps + d->qvel[i] = saveqvel - eps; + mj_fwdVelocity(m, d); + mj_fwdActuation(m, d); + mju_add(minus, d->qfrc_actuator, d->qfrc_passive, nv); + mju_subFrom(minus, d->qfrc_bias, nv); + + // restore qvel[i] + d->qvel[i] = saveqvel; + + // finite difference result in fd + mju_sub(fd, plus, minus, nv); + mju_scl(fd, fd, 0.5/eps, nv); + + // copy to sparse qDeriv + for (int j=0; jD_rownnz[j] && d->D_colind[d->D_rowadr[j]+cnt[j]]==i) { + d->qDeriv[d->D_rowadr[j]+cnt[j]] = fd[j]; + cnt[j]++; + } + } + } + + // make sure final row counters equal rownnz + for (int i=0; iD_rownnz[i]) { + mju_error("error in constructing FD sparse derivative"); + } + } + + // restore + mj_fwdVelocity(m, d); + mj_fwdActuation(m, d); + + mjFREESTACK; +} + + + +//------------------------- main entry points ------------------------------------------------------ + + +// finite differenced Jacobian of (next_state, sensors) = mj_step(state, control) +// all outputs are optional +// output dimensions (transposed w.r.t Control Theory convention): +// DyDq: (nv x 2*nv+na) +// DyDv: (nv x 2*nv+na) +// DyDa: (na x 2*nv+na) +// DyDu: (nu x 2*nv+na) +// DsDq: (nv x nsensordata) +// DsDv: (nv x nsensordata) +// DsDa: (na x nsensordata) +// DsDu: (nu x nsensordata) +// single-letter shortcuts: +// inputs: q=qpos, v=qvel, a=act, u=ctrl +// outputs: y=next_state (concatenated next qpos, qvel, act), s=sensordata +void mjd_stepFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, + mjtNum* DyDq, mjtNum* DyDv, mjtNum* DyDa, mjtNum* DyDu, + mjtNum* DsDq, mjtNum* DsDv, mjtNum* DsDa, mjtNum* DsDu) { + int nq = m->nq, nv = m->nv, na = m->na, nu = m->nu, ns = m->nsensordata; + int ndx = 2*nv+na; // row length of Dy Jacobians + mjMARKSTACK; + + // states + mjtNum *state = mj_stackAlloc(d, nq+nv+na); // current state + mjtNum *next = mj_stackAlloc(d, nq+nv+na); // next state + mjtNum *next_plus = mj_stackAlloc(d, nq+nv+na); // forward-nudged next state + mjtNum *next_minus = mj_stackAlloc(d, nq+nv+na); // backward-nudged next state + + // warmstart accelerations + mjtNum *warmstart = mjDISABLED(mjDSBL_WARMSTART) ? NULL : mj_stackAlloc(d, nv); + + // sensors + int skipsensor = !DsDq && !DsDv && !DsDa && !DsDu; + mjtNum *sensor = skipsensor ? NULL : mj_stackAlloc(d, ns); // sensor values + mjtNum *sensor_plus = skipsensor ? NULL : mj_stackAlloc(d, ns); // forward-nudged sensors + mjtNum *sensor_minus = skipsensor ? NULL : mj_stackAlloc(d, ns); // backward-nudged sensors + + // controls + mjtNum *ctrl = mj_stackAlloc(d, nu); + + // save current inputs + mjtNum time = d->time; + mju_copy(ctrl, d->ctrl, nu); + getState(m, d, state, NULL); + if (warmstart) { + mju_copy(warmstart, d->qacc_warmstart, nv); + } + + // step input + mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); + + // save output + getState(m, d, next, sensor); + + // restore input + setState(m, d, time, state, ctrl, warmstart); + + // finite-difference controls: skip=mjSTAGE_VEL, handle ctrl at range limits + if (DyDu || DsDu) { + for (int i=0; iactuator_ctrllimited[i]; + // nudge forward, if possible given ctrlrange + int nudge_fwd = !limited || inRange(ctrl[i], ctrl[i]+eps, m->actuator_ctrlrange+2*i); + if (nudge_fwd) { + // nudge forward + d->ctrl[i] += eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); + getState(m, d, next_plus, sensor_plus); + + // reset + setState(m, d, time, state, ctrl, warmstart); + } + + // nudge backward, if possible given ctrlrange + int nudge_back = (centered || !nudge_fwd) && + (!limited || inRange(ctrl[i]-eps, ctrl[i], m->actuator_ctrlrange+2*i)); + if (nudge_back) { + // nudge backward + d->ctrl[i] -= eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); + getState(m, d, next_minus, sensor_minus); + + // reset + setState(m, d, time, state, ctrl, warmstart); + } + + // difference states + if (DyDu) { + clampedStateDiff(m, DyDu+i*ndx, next, nudge_fwd ? next_plus : NULL, + nudge_back ? next_minus : NULL, eps); + } + + // difference sensors + if (DsDu) { + clampedDiff(DsDu+i*ns, sensor, nudge_fwd ? sensor_plus : NULL, + nudge_back ? sensor_minus : NULL, eps, ns); + } + } + } + + // finite-difference activations: skip=mjSTAGE_VEL + if (DyDa || DsDa) { + for (int i=0; iact[i] += eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); + getState(m, d, next_plus, sensor_plus); + + // reset + setState(m, d, time, state, NULL, warmstart); + + // nudge backward + if (centered) { + // nudge backward + d->act[i] -= eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_VEL, skipsensor); + getState(m, d, next_minus, sensor_minus); + + // reset + setState(m, d, time, state, NULL, warmstart); + } + + // difference states + if (DyDa) { + if (!centered) { + stateDiff(m, DyDa+i*ndx, next, next_plus, eps); + } else { + stateDiff(m, DyDa+i*ndx, next_minus, next_plus, 2*eps); + } + } + + // difference sensors + if (DsDa) { + if (!centered) { + diff(DsDa+i*ns, sensor, sensor_plus, eps, ns); + } else { + diff(DsDa+i*ns, sensor_minus, sensor_plus, 2*eps, ns); + } + } + } + } + + + // finite-difference velocities: skip=mjSTAGE_POS + if (DyDv || DsDv) { + for (int i=0; iqvel[i] += eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_POS, skipsensor); + getState(m, d, next_plus, sensor_plus); + + // reset + setState(m, d, time, state, NULL, warmstart); + + // nudge backward + if (centered) { + // nudge + d->qvel[i] -= eps; + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_POS, skipsensor); + getState(m, d, next_minus, sensor_minus); + + // reset + setState(m, d, time, state, NULL, warmstart); + } + + // difference states + if (DyDv) { + if (!centered) { + stateDiff(m, DyDv+i*ndx, next, next_plus, eps); + } else { + stateDiff(m, DyDv+i*ndx, next_minus, next_plus, 2*eps); + } + } + + // difference sensors + if (DsDv) { + if (!centered) { + diff(DsDv+i*ns, sensor, sensor_plus, eps, ns); + } else { + diff(DsDv+i*ns, sensor_minus, sensor_plus, 2*eps, ns); + } + } + } + } + + // finite-difference positions: skip=mjSTAGE_NONE + if (DyDq || DsDq) { + mjtNum *dpos = mj_stackAlloc(d, nv); // allocate position perturbation + for (int i=0; iqpos, dpos, eps); + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); + getState(m, d, next_plus, sensor_plus); + + // reset + setState(m, d, time, state, NULL, warmstart); + + // nudge backward + if (centered) { + // nudge backward + mju_zero(dpos, nv); + dpos[i] = 1; + mj_integratePos(m, d->qpos, dpos, -eps); + + // step, get nudged output + mj_stepSkip(m, d, mjSTAGE_NONE, skipsensor); + getState(m, d, next_minus, sensor_minus); + + // reset + setState(m, d, time, state, NULL, warmstart); + } + + // difference states + if (DyDq) { + if (!centered) { + stateDiff(m, DyDq+i*ndx, next, next_plus, eps); + } else { + stateDiff(m, DyDq+i*ndx, next_minus, next_plus, 2*eps); + } + } + + // difference sensors + if (DsDq) { + if (!centered) { + diff(DsDq+i*ns, sensor, sensor_plus, eps, ns); + } else { + diff(DsDq+i*ns, sensor_minus, sensor_plus, 2*eps, ns); + } + } + } + } + + mjFREESTACK; +} + + + +// finite differenced transition matrices (control theory notation) +// d(x_next) = A*dx + B*du +// d(sensor) = C*dx + D*du +// required output matrix dimensions: +// A: (2*nv+na x 2*nv+na) +// B: (2*nv+na x nu) +// D: (nsensordata x 2*nv+na) +// C: (nsensordata x nu) +void mjd_transitionFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, + mjtNum* A, mjtNum* B, mjtNum* C, mjtNum* D) { + int nv = m->nv, na = m->na, nu = m->nu, ns = m->nsensordata; + int ndx = 2*nv+na; // row length of state Jacobians + + // stepFD() offset pointers, initialised to NULL + mjtNum *DyDq, *DyDv, *DyDa, *DsDq, *DsDv, *DsDa; + DyDq = DyDv = DyDa = DsDq = DsDv = DsDa = NULL; + + mjMARKSTACK; + + // allocate transposed matrices + mjtNum *AT = A ? mj_stackAlloc(d, ndx*ndx) : NULL; // state-transition matrix (transposed) + mjtNum *BT = B ? mj_stackAlloc(d, nu*ndx) : NULL; // control-transition matrix (transposed) + mjtNum *CT = C ? mj_stackAlloc(d, ndx*ns) : NULL; // state-observation matrix (transposed) + mjtNum *DT = D ? mj_stackAlloc(d, nu*ns) : NULL; // control-observation matrix (transposed) + + // set offset pointers + if (A) { + DyDq = AT; + DyDv = AT+ndx*nv; + DyDa = AT+ndx*2*nv; + } + + if (C) { + DsDq = CT; + DsDv = CT + ns*nv; + DsDa = CT + ns*2*nv; + } + + // get Jacobians + mjd_stepFD(m, d, eps, centered, DyDq, DyDv, DyDa, BT, DsDq, DsDv, DsDa, DT); + + + // transpose + if (A) mju_transpose(A, AT, ndx, ndx); + if (B) mju_transpose(B, BT, nu, ndx); + if (C) mju_transpose(C, CT, ndx, ns); + if (D) mju_transpose(D, DT, nu, ns); + + mjFREESTACK; +} diff --git a/src/engine/engine_derivative_fd.h b/src/engine/engine_derivative_fd.h new file mode 100644 index 0000000000..d19667e0ca --- /dev/null +++ b/src/engine/engine_derivative_fd.h @@ -0,0 +1,43 @@ +// Copyright 2022 DeepMind Technologies Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MUJOCO_SRC_ENGINE_ENGINE_DERIVATIVE_FD_H_ +#define MUJOCO_SRC_ENGINE_ENGINE_DERIVATIVE_FD_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// centered finite difference approximation to mjd_smooth_vel +MJAPI void mjd_smooth_velFD(const mjModel* m, mjData* d, mjtNum eps); + +// add forward finite difference approximation of (d qfrc_passive / d qvel) to qDeriv +MJAPI void mjd_passive_velFD(const mjModel* m, mjData* d, mjtNum eps); + +// advance simulation using control callback, skipstage is mjtStage +MJAPI void mj_stepSkip(const mjModel* m, mjData* d, int skipstage, int skipsensor); + +// finite differenced transition matrices (control theory notation) +MJAPI void mjd_transitionFD(const mjModel* m, mjData* d, mjtNum eps, mjtByte centered, + mjtNum* A, mjtNum* B, mjtNum* C, mjtNum* D); + +#ifdef __cplusplus +} +#endif + +#endif // MUJOCO_SRC_ENGINE_ENGINE_DERIVATIVE_FD_H_ diff --git a/test/engine/engine_derivative_test.cc b/test/engine/engine_derivative_test.cc index 826866d9b8..1d251e15a2 100644 --- a/test/engine/engine_derivative_test.cc +++ b/test/engine/engine_derivative_test.cc @@ -14,7 +14,6 @@ // Tests for engine/engine_derivative.c. -#include #include #include @@ -23,6 +22,7 @@ #include #include "src/engine/engine_core_smooth.h" #include "src/engine/engine_derivative.h" +#include "src/engine/engine_derivative_fd.h" #include "src/engine/engine_io.h" #include "src/engine/engine_util_blas.h" #include "src/engine/engine_util_errmem.h" @@ -78,7 +78,7 @@ static void PrintMatrix(mjtNum* mat, int nrow, int ncol) { } std::cerr << "\n"; } -} +} // NOLINT(clang-diagnostic-unused-function) std::vector AsVector(const mjtNum* array, int n) {