Sleipnir C++ API
Loading...
Searching...
No Matches
adjoint_expression_graph.hpp
1// Copyright (c) Sleipnir contributors
2
3#pragma once
4
5#include <ranges>
6#include <utility>
7
8#include <Eigen/SparseCore>
9#include <gch/small_vector.hpp>
10
11#include "sleipnir/autodiff/expression_graph.hpp"
12#include "sleipnir/autodiff/variable.hpp"
13#include "sleipnir/autodiff/variable_matrix.hpp"
14#include "sleipnir/util/assert.hpp"
15
16namespace slp::detail {
17
23 public:
29 explicit AdjointExpressionGraph(const Variable& root)
30 : m_top_list{topological_sort(root.expr)} {
31 for (const auto& node : m_top_list) {
32 m_col_list.emplace_back(node->col);
33 }
34 }
35
40 void update_values() { detail::update_values(m_top_list); }
41
54 slp_assert(wrt.cols() == 1);
55
56 // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
57 // for background on reverse accumulation automatic differentiation.
58
59 if (m_top_list.empty()) {
61 }
62
63 // Set root node's adjoint to 1 since df/df is 1
64 m_top_list[0]->adjoint_expr = make_expression_ptr<ConstExpression>(1.0);
65
66 // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
67 // multiplied by dy/dx. If there are multiple "paths" from the root node to
68 // variable; the variable's adjoint is the sum of each path's adjoint
69 // contribution.
70 for (auto& node : m_top_list) {
71 auto& lhs = node->args[0];
72 auto& rhs = node->args[1];
73
74 if (lhs != nullptr) {
75 lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);
76 if (rhs != nullptr) {
77 rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);
78 }
79 }
80 }
81
82 // Move gradient tree to return value
84 for (int row = 0; row < grad.rows(); ++row) {
85 grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};
86 }
87
88 // Unlink adjoints to avoid circular references between them and their
89 // parent expressions. This ensures all expressions are returned to the free
90 // list.
91 for (auto& node : m_top_list) {
92 node->adjoint_expr = nullptr;
93 }
94
95 return grad;
96 }
97
108 gch::small_vector<Eigen::Triplet<double>>& triplets, int row,
109 const VariableMatrix& wrt) const {
110 // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation
111 // for background on reverse accumulation automatic differentiation.
112
113 // If wrt has fewer nodes than graph, zero wrt's adjoints
114 if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {
115 for (const auto& elem : wrt) {
116 elem.expr->adjoint = 0.0;
117 }
118 }
119
120 if (m_top_list.empty()) {
121 return;
122 }
123
124 // Set root node's adjoint to 1 since df/df is 1
125 m_top_list[0]->adjoint = 1.0;
126
127 // Zero the rest of the adjoints
128 for (auto& node : m_top_list | std::views::drop(1)) {
129 node->adjoint = 0.0;
130 }
131
132 // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y
133 // multiplied by dy/dx. If there are multiple "paths" from the root node to
134 // variable; the variable's adjoint is the sum of each path's adjoint
135 // contribution.
136 for (const auto& node : m_top_list) {
137 auto& lhs = node->args[0];
138 auto& rhs = node->args[1];
139
140 if (lhs != nullptr) {
141 if (rhs != nullptr) {
142 lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);
143 rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);
144 } else {
145 lhs->adjoint += node->grad_l(lhs->val, 0.0, node->adjoint);
146 }
147 }
148 }
149
150 // If wrt has fewer nodes than graph, iterate over wrt
151 if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {
152 for (int col = 0; col < wrt.rows(); ++col) {
153 const auto& node = wrt[col].expr;
154
155 // Append adjoints of wrt to sparse matrix triplets
156 if (node->adjoint != 0.0) {
157 triplets.emplace_back(row, col, node->adjoint);
158 }
159 }
160 } else {
161 for (const auto& [col, node] : std::views::zip(m_col_list, m_top_list)) {
162 // Append adjoints of wrt to sparse matrix triplets
163 if (col != -1 && node->adjoint != 0.0) {
164 triplets.emplace_back(row, col, node->adjoint);
165 }
166 }
167 }
168 }
169
170 private:
171 // Topological sort of graph from parent to child
172 gch::small_vector<Expression*> m_top_list;
173
174 // List that maps nodes to their respective column
175 gch::small_vector<int> m_col_list;
176};
177
178} // namespace slp::detail
Definition variable_matrix.hpp:29
int rows() const
Definition variable_matrix.hpp:949
int cols() const
Definition variable_matrix.hpp:956
static constexpr empty_t empty
Definition variable_matrix.hpp:39
Definition variable.hpp:40
Definition adjoint_expression_graph.hpp:22
VariableMatrix generate_gradient_tree(const VariableMatrix &wrt) const
Definition adjoint_expression_graph.hpp:53
void append_gradient_triplets(gch::small_vector< Eigen::Triplet< double > > &triplets, int row, const VariableMatrix &wrt) const
Definition adjoint_expression_graph.hpp:107
void update_values()
Definition adjoint_expression_graph.hpp:40
AdjointExpressionGraph(const Variable &root)
Definition adjoint_expression_graph.hpp:29