docs/cpp/gradient__expression__graph_8hpp_source.html

// Copyright (c) Sleipnir contributors


#pragma once


#include <ranges>

#include <utility>


#include <Eigen/SparseCore>

#include <gch/small_vector.hpp>


#include "sleipnir/autodiff/expression_graph.hpp"

#include "sleipnir/autodiff/variable.hpp"

#include "sleipnir/autodiff/variable_matrix.hpp"

#include "sleipnir/util/assert.hpp"

#include "sleipnir/util/empty.hpp"


namespace slp::detail {


template <typename Scalar>


class GradientExpressionGraph {

 public:


  explicit GradientExpressionGraph(const Variable<Scalar>& root)

      : m_top_list{topological_sort(root.expr)} {

    for (const auto& node : m_top_list) {

      m_col_list.emplace_back(node->col);

    }

  }


  void update_values() { detail::update_values(m_top_list); }


  VariableMatrix<Scalar> generate_tree(

      const VariableMatrix<Scalar>& wrt) const {

    slp_assert(wrt.cols() == 1);


    // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation

    // for background on reverse accumulation automatic differentiation.


    if (m_top_list.empty()) {

      return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};

    }


    // Set root node's adjoint to 1 since df/df is 1

    m_top_list[0]->adjoint_expr = constant_ptr(Scalar(1));


    // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y

    // multiplied by dy/dx. If there are multiple "paths" from the root node to

    // variable; the variable's adjoint is the sum of each path's adjoint

    // contribution.

    for (auto& node : m_top_list) {

      auto& lhs = node->args[0];

      auto& rhs = node->args[1];


      if (lhs != nullptr) {

        if (rhs != nullptr) {

          // Binary operator

          lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);

          rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);

        } else {

          // Unary operator

          lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);

        }

      }

    }


    // Move gradient tree to return value

    VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};

    for (int row = 0; row < grad.rows(); ++row) {

      grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};

    }


    // Unlink adjoints to avoid circular references between them and their

    // parent expressions. This ensures all expressions are returned to the free

    // list.

    for (auto& node : m_top_list) {

      node->adjoint_expr = nullptr;

    }


    return grad;

  }


  void append_triplets(gch::small_vector<Eigen::Triplet<Scalar>>& triplets,

                       int row, const VariableMatrix<Scalar>& wrt) const {

    slp_assert(wrt.cols() == 1);


    // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation

    // for background on reverse accumulation automatic differentiation.


    // If wrt has fewer nodes than graph, zero wrt's adjoints

    if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {

      for (const auto& elem : wrt) {

        elem.expr->adjoint = Scalar(0);

      }

    }


    if (m_top_list.empty()) {

      return;

    }


    // Set root node's adjoint to 1 since df/df is 1

    m_top_list[0]->adjoint = Scalar(1);


    // Zero the rest of the adjoints

    for (auto& node : m_top_list | std::views::drop(1)) {

      node->adjoint = Scalar(0);

    }


    // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y

    // multiplied by dy/dx. If there are multiple "paths" from the root node to

    // variable; the variable's adjoint is the sum of each path's adjoint

    // contribution.

    for (const auto& node : m_top_list) {

      auto& lhs = node->args[0];

      auto& rhs = node->args[1];


      if (lhs != nullptr) {

        if (rhs != nullptr) {

          // Binary operator

          lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);

          rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);

        } else {

          // Unary operator

          lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);

        }

      }

    }


    // If wrt has fewer nodes than graph, iterate over wrt

    if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {

      for (int col = 0; col < wrt.rows(); ++col) {

        const auto& node = wrt[col].expr;


        // Append adjoints of wrt to sparse matrix triplets

        if (node->adjoint != Scalar(0)) {

          triplets.emplace_back(row, col, node->adjoint);

        }

      }

    } else {

      for (const auto& [col, node] : std::views::zip(m_col_list, m_top_list)) {

        // Append adjoints of wrt to sparse matrix triplets

        if (col != -1 && node->adjoint != Scalar(0)) {

          triplets.emplace_back(row, col, node->adjoint);

        }

      }

    }

  }


 private:

  gch::small_vector<Expression<Scalar>*> m_top_list;


  gch::small_vector<int> m_col_list;

};


}  // namespace slp::detail

slp::IntrusiveSharedPtr
Definition intrusive_shared_ptr.hpp:27

slp::Variable
Definition variable.hpp:47

slp::detail::GradientExpressionGraph
Definition gradient_expression_graph.hpp:25

slp::detail::GradientExpressionGraph::GradientExpressionGraph
GradientExpressionGraph(const Variable< Scalar > &root)
Definition gradient_expression_graph.hpp:30

slp::detail::GradientExpressionGraph::append_triplets
void append_triplets(gch::small_vector< Eigen::Triplet< Scalar > > &triplets, int row, const VariableMatrix< Scalar > &wrt) const
Definition gradient_expression_graph.hpp:107

slp::detail::GradientExpressionGraph::update_values
void update_values()
Definition gradient_expression_graph.hpp:39

slp::detail::GradientExpressionGraph::generate_tree
VariableMatrix< Scalar > generate_tree(const VariableMatrix< Scalar > &wrt) const
Definition gradient_expression_graph.hpp:50