Skip to content

Commit

Permalink
Add CSR implementation of mj_factorI
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 712498431
Change-Id: I13b52e53482ed97da8788875d4d95e2beb5ca7c1
  • Loading branch information
yuvaltassa authored and copybara-github committed Jan 6, 2025
1 parent 7eb8231 commit ac11e5f
Show file tree
Hide file tree
Showing 9 changed files with 514 additions and 14 deletions.
46 changes: 39 additions & 7 deletions src/engine/engine_core_smooth.c
Original file line number Diff line number Diff line change
Expand Up @@ -1440,10 +1440,9 @@ void mj_factorI(const mjModel* m, mjData* d, const mjtNum* M, mjtNum* qLD, mjtNu
}
}

// compute 1/diag(D), 1/sqrt(diag(D))
// compute 1/diag(D)
for (int i=0; i < nv; i++) {
mjtNum qLDi = qLD[dof_Madr[i]];
qLDiagInv[i] = 1.0/qLDi;
qLDiagInv[i] = 1.0 / qLD[dof_Madr[i]];
}
}

Expand All @@ -1458,6 +1457,40 @@ void mj_factorM(const mjModel* m, mjData* d) {



// sparse L'*D*L factorizaton of inertia-like matrix M, assumed spd
// like mj_factorI, but using CSR representation
void mj_factorIs(mjtNum* mat, mjtNum* diaginv, int nv,
const int* rownnz, const int* rowadr, const int* diagnum, const int* colind) {
// backward loop over rows
for (int k=nv-1; k >= 0; k--) {
// get row k's address, diagonal index, inverse diagonal value
int rowadr_k = rowadr[k];
int diag_k = rowadr_k + rownnz[k] - 1;
mjtNum invD = 1 / mat[diag_k];
if (diaginv) diaginv[k] = invD;

// skip if simple
if (diagnum[k]) {
continue;
}

// update triangle above row k, inclusive
for (int adr=diag_k - 1; adr >= rowadr_k; adr--) {
// tmp = L(k, i) / L(k, k)
mjtNum tmp = mat[adr] * invD;

// update row i < k: L(i, 0..i) -= L(i, 0..i) * L(k, i) / L(k, k)
int i = colind[adr];
mju_addToScl(mat + rowadr[i], mat + rowadr_k, -tmp, rownnz[i]);

// update ith element of row k: L(k, i) /= L(k, k)
mat[adr] = tmp;
}
}
}



// in-place sparse backsubstitution: x = inv(L'*D*L)*x
// L is in lower triangle of qLD; D is on diagonal of qLD
// handle n vectors at once
Expand Down Expand Up @@ -1575,16 +1608,15 @@ void mj_solveLD(const mjModel* m, mjtNum* restrict x, int n,
// in-place sparse backsubstitution: x = inv(L'*D*L)*x
// like mj_solveLD, but using the CSR representation of L
void mj_solveLDs(mjtNum* restrict x, const mjtNum* qLDs, const mjtNum* qLDiagInv, int nv,
const int* rownnz, const int* rowadr, const int* diagind, const int* diagnum,
const int* colind) {
const int* rownnz, const int* rowadr, const int* diagnum, const int* colind) {
// x <- L^-T x
for (int i=nv-1; i > 0; i--) {
// skip diagonal (simple) rows, exploit sparsity of input vector
if (diagnum[i] || x[i] == 0) {
continue;
}

int d = diagind[i];
int d = rownnz[i] - 1;
int adr_i = rowadr[i];
mjtNum x_i = x[i];
for (int j=0; j < d; j++) {
Expand All @@ -1607,7 +1639,7 @@ void mj_solveLDs(mjtNum* restrict x, const mjtNum* qLDs, const mjtNum* qLDiagInv
}

int adr = rowadr[i];
x[i] -= mju_dotSparse(qLDs+adr, x, diagind[i], colind+adr, /*flg_unc1=*/0);
x[i] -= mju_dotSparse(qLDs+adr, x, rownnz[i] - 1, colind+adr, /*flg_unc1=*/0);
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/engine/engine_core_smooth.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ MJAPI void mj_crb(const mjModel* m, mjData* d);
// sparse L'*D*L factorizaton of inertia-like matrix M, assumed spd
MJAPI void mj_factorI(const mjModel* m, mjData* d, const mjtNum* M, mjtNum* qLD, mjtNum* qLDiagInv);

// sparse L'*D*L factorizaton of inertia-like matrix
// like mj_factorI, but using CSR representation
MJAPI void mj_factorIs(mjtNum* mat, mjtNum* diaginv, int nv,
const int* rownnz, const int* rowadr, const int* diagnum, const int* colind);

// sparse L'*D*L factorizaton of the inertia matrix M, assumed spd
MJAPI void mj_factorM(const mjModel* m, mjData* d);

Expand All @@ -61,8 +66,7 @@ MJAPI void mj_solveLD(const mjModel* m, mjtNum* x, int n,
// in-place sparse backsubstitution: x = inv(L'*D*L)*x
// like mj_solveLD, but using the CSR representation of L
MJAPI void mj_solveLDs(mjtNum* x, const mjtNum* qLDs, const mjtNum* qLDiagInv, int nv,
const int* rownnz, const int* rowadr, const int* diagind, const int* diagnum,
const int* colind);
const int* rownnz, const int* rowadr, const int* diagnum, const int* colind);

// sparse backsubstitution: x = inv(L'*D*L)*y, use factorization in d
MJAPI void mj_solveM(const mjModel* m, mjData* d, mjtNum* x, const mjtNum* y, int n);
Expand Down
12 changes: 12 additions & 0 deletions test/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ mujoco_test(
ADDITIONAL_LINK_LIBRARIES benchmark::benchmark absl::core_headers
)

mujoco_test(
factorI_benchmark_test
MAIN_TARGET benchmark::benchmark_main
ADDITIONAL_LINK_LIBRARIES benchmark::benchmark absl::core_headers
)

mujoco_test(
inertia_benchmark_test
MAIN_TARGET benchmark::benchmark_main
ADDITIONAL_LINK_LIBRARIES benchmark::benchmark absl::core_headers
)

mujoco_test(
solveLD_benchmark_test
MAIN_TARGET benchmark::benchmark_main
Expand Down
102 changes: 102 additions & 0 deletions test/benchmark/factorI_benchmark_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright 2025 DeepMind Technologies Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// A benchmark for comparing different implementations of mj_factorI.

#include <benchmark/benchmark.h>
#include <absl/base/attributes.h>
#include <mujoco/mjdata.h>
#include <mujoco/mujoco.h>
#include "src/engine/engine_core_smooth.h"
#include "test/fixture.h"

namespace mujoco {
namespace {

// number of steps to benchmark
static const int kNumBenchmarkSteps = 50;

// ----------------------------- benchmark ------------------------------------

static void BM_factorI(benchmark::State& state, bool legacy, bool coil) {
static mjModel* m;
if (coil) {
m = LoadModelFromPath("plugin/elasticity/coil.xml");
} else {
m = LoadModelFromPath("humanoid/humanoid100.xml");
}

mjData* d = mj_makeData(m);
mj_forward(m, d);

// allocate inputs and outputs
mj_markStack(d);

// CSR matrices
mjtNum* Ms = mj_stackAllocNum(d, m->nC);
mjtNum* LDs = mj_stackAllocNum(d, m->nC);
for (int i=0; i < m->nC; i++) {
Ms[i] = d->qM[d->mapM2C[i]];
}

// benchmark
while (state.KeepRunningBatch(kNumBenchmarkSteps)) {
for (int i=0; i < kNumBenchmarkSteps; i++) {
if (legacy) {
mj_factorI(m, d, d->qM, d->qLD, d->qLDiagInv);
} else {
mju_copy(LDs, Ms, m->nC);
mj_factorIs(LDs, d->qLDiagInv, m->nv,
d->C_rownnz, d->C_rowadr, m->dof_simplenum, d->C_colind);
}
}
}

// finalize
mj_freeStack(d);
mj_deleteData(d);
mj_deleteModel(m);
state.SetItemsProcessed(state.iterations());
}

void ABSL_ATTRIBUTE_NO_TAIL_CALL
BM_factorI_COIL_LEGACY(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_factorI(state, /*legacy=*/true, /*coil=*/true);
}
BENCHMARK(BM_factorI_COIL_LEGACY);

void ABSL_ATTRIBUTE_NO_TAIL_CALL
BM_factorI_COIL_CSR(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_factorI(state, /*legacy=*/false, /*coil=*/true);
}
BENCHMARK(BM_factorI_COIL_CSR);

void ABSL_ATTRIBUTE_NO_TAIL_CALL
BM_factorI_H100_LEGACY(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_factorI(state, /*legacy=*/true, /*coil=*/false);
}
BENCHMARK(BM_factorI_H100_LEGACY);

void ABSL_ATTRIBUTE_NO_TAIL_CALL
BM_factorI_H100_CSR(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_factorI(state, /*legacy=*/false, /*coil=*/false);
}
BENCHMARK(BM_factorI_H100_CSR);

} // namespace
} // namespace mujoco
101 changes: 101 additions & 0 deletions test/benchmark/inertia_benchmark_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2025 DeepMind Technologies Limited
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// A benchmark for comparing legacy and two CSR implementations of inertia
// factor and then solve.

#include <benchmark/benchmark.h>
#include <absl/base/attributes.h>
#include <mujoco/mjdata.h>
#include <mujoco/mujoco.h>
#include "src/engine/engine_core_smooth.h"
#include "test/fixture.h"

namespace mujoco {
namespace {

// number of steps to benchmark
static const int kNumBenchmarkSteps = 50;

// ----------------------------- benchmark ------------------------------------

enum class SolveType {
kLegacy = 0,
kCsr,
};

static void BM_solve(benchmark::State& state, SolveType type) {
static mjModel* m;
m = LoadModelFromPath("../test/benchmark/testdata/inertia.xml");

mjData* d = mj_makeData(m);
mj_forward(m, d);

// allocate input and output vectors
mj_markStack(d);

// make CSR matrix
mjtNum* Ms = mj_stackAllocNum(d, m->nC);
mjtNum* LDs = mj_stackAllocNum(d, m->nC);
for (int i=0; i < m->nC; i++) {
Ms[i] = d->qM[d->mapM2C[i]];
}

// arbitrary input vector
mjtNum *res = mj_stackAllocNum(d, m->nv);
mjtNum *vec = mj_stackAllocNum(d, m->nv);
for (int i=0; i < m->nv; i++) {
vec[i] = 0.2 + 0.3*i;
}

// benchmark
while (state.KeepRunningBatch(kNumBenchmarkSteps)) {
for (int i=0; i < kNumBenchmarkSteps; i++) {
switch (type) {
case SolveType::kLegacy:
mj_factorI(m, d, d->qM, d->qLD, d->qLDiagInv);
mj_solveM(m, d, res, vec, 1);
break;
case SolveType::kCsr:
mju_copy(LDs, Ms, m->nC);
mj_factorIs(LDs, d->qLDiagInv, m->nv,
d->C_rownnz, d->C_rowadr, m->dof_simplenum, d->C_colind);
mju_copy(res, vec, m->nv);
mj_solveLDs(res, LDs, d->qLDiagInv, m->nv,
d->C_rownnz, d->C_rowadr, m->dof_simplenum, d->C_colind);
}
}
}

// finalize
mj_freeStack(d);
mj_deleteData(d);
mj_deleteModel(m);
state.SetItemsProcessed(state.iterations());
}

void ABSL_ATTRIBUTE_NO_TAIL_CALL BM_solve_LEGACY(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_solve(state, SolveType::kLegacy);
}
BENCHMARK(BM_solve_LEGACY);

void ABSL_ATTRIBUTE_NO_TAIL_CALL BM_solve_CSR(benchmark::State& state) {
MujocoErrorTestGuard guard;
BM_solve(state, SolveType::kCsr);
}
BENCHMARK(BM_solve_CSR);

} // namespace
} // namespace mujoco
3 changes: 1 addition & 2 deletions test/benchmark/solveLD_benchmark_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ static void BM_solveLD(benchmark::State& state, bool featherstone, bool coil) {
} else {
mju_copy(res, vec, m->nv);
mj_solveLDs(res, LDs, d->qLDiagInv, m->nv,
d->C_rownnz, d->C_rowadr, d->C_diag, m->dof_simplenum,
d->C_colind);
d->C_rownnz, d->C_rowadr, m->dof_simplenum, d->C_colind);
}
}
}
Expand Down
Loading

0 comments on commit ac11e5f

Please sign in to comment.