123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2019 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Author: sameeragarwal@google.com (Sameer Agarwal)
- //
- // The LossFunction interface is the way users describe how residuals
- // are converted to cost terms for the overall problem cost function.
- // For the exact manner in which loss functions are converted to the
- // overall cost for a problem, see problem.h.
- //
- // For least squares problem where there are no outliers and standard
- // squared loss is expected, it is not necessary to create a loss
- // function; instead passing a nullptr to the problem when adding
- // residuals implies a standard squared loss.
- //
- // For least squares problems where the minimization may encounter
- // input terms that contain outliers, that is, completely bogus
- // measurements, it is important to use a loss function that reduces
- // their associated penalty.
- //
- // Consider a structure from motion problem. The unknowns are 3D
- // points and camera parameters, and the measurements are image
- // coordinates describing the expected reprojected position for a
- // point in a camera. For example, we want to model the geometry of a
- // street scene with fire hydrants and cars, observed by a moving
- // camera with unknown parameters, and the only 3D points we care
- // about are the pointy tippy-tops of the fire hydrants. Our magic
- // image processing algorithm, which is responsible for producing the
- // measurements that are input to Ceres, has found and matched all
- // such tippy-tops in all image frames, except that in one of the
- // frame it mistook a car's headlight for a hydrant. If we didn't do
- // anything special (i.e. if we used a basic quadratic loss), the
- // residual for the erroneous measurement will result in extreme error
- // due to the quadratic nature of squared loss. This results in the
- // entire solution getting pulled away from the optimum to reduce
- // the large error that would otherwise be attributed to the wrong
- // measurement.
- //
- // Using a robust loss function, the cost for large residuals is
- // reduced. In the example above, this leads to outlier terms getting
- // downweighted so they do not overly influence the final solution.
- //
- // What cost function is best?
- //
- // In general, there isn't a principled way to select a robust loss
- // function. The authors suggest starting with a non-robust cost, then
- // only experimenting with robust loss functions if standard squared
- // loss doesn't work.
- #ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
- #define CERES_PUBLIC_LOSS_FUNCTION_H_
- #include <memory>
- #include "ceres/internal/disable_warnings.h"
- #include "ceres/internal/export.h"
- #include "ceres/types.h"
- #include "glog/logging.h"
- namespace ceres {
- class CERES_EXPORT LossFunction {
- public:
- virtual ~LossFunction();
- // For a residual vector with squared 2-norm 'sq_norm', this method
- // is required to fill in the value and derivatives of the loss
- // function (rho in this example):
- //
- // out[0] = rho(sq_norm),
- // out[1] = rho'(sq_norm),
- // out[2] = rho''(sq_norm),
- //
- // Here the convention is that the contribution of a term to the
- // cost function is given by 1/2 rho(s), where
- //
- // s = ||residuals||^2.
- //
- // Calling the method with a negative value of 's' is an error and
- // the implementations are not required to handle that case.
- //
- // Most sane choices of rho() satisfy:
- //
- // rho(0) = 0,
- // rho'(0) = 1,
- // rho'(s) < 1 in outlier region,
- // rho''(s) < 0 in outlier region,
- //
- // so that they mimic the least squares cost for small residuals.
- virtual void Evaluate(double sq_norm, double out[3]) const = 0;
- };
- // Some common implementations follow below.
- //
- // Note: in the region of interest (i.e. s < 3) we have:
- // TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
- // This corresponds to no robustification.
- //
- // rho(s) = s
- //
- // At s = 0: rho = [0, 1, 0].
- //
- // It is not normally necessary to use this, as passing nullptr for the
- // loss function when building the problem accomplishes the same
- // thing.
- class CERES_EXPORT TrivialLoss final : public LossFunction {
- public:
- void Evaluate(double, double*) const override;
- };
- // Scaling
- // -------
- // Given one robustifier
- // s -> rho(s)
- // one can change the length scale at which robustification takes
- // place, by adding a scale factor 'a' as follows:
- //
- // s -> a^2 rho(s / a^2).
- //
- // The first and second derivatives are:
- //
- // s -> rho'(s / a^2),
- // s -> (1 / a^2) rho''(s / a^2),
- //
- // but the behaviour near s = 0 is the same as the original function,
- // i.e.
- //
- // rho(s) = s + higher order terms,
- // a^2 rho(s / a^2) = s + higher order terms.
- //
- // The scalar 'a' should be positive.
- //
- // The reason for the appearance of squaring is that 'a' is in the
- // units of the residual vector norm whereas 's' is a squared
- // norm. For applications it is more convenient to specify 'a' than
- // its square. The commonly used robustifiers below are described in
- // un-scaled format (a = 1) but their implementations work for any
- // non-zero value of 'a'.
- // Huber.
- //
- // rho(s) = s for s <= 1,
- // rho(s) = 2 sqrt(s) - 1 for s >= 1.
- //
- // At s = 0: rho = [0, 1, 0].
- //
- // The scaling parameter 'a' corresponds to 'delta' on this page:
- // http://en.wikipedia.org/wiki/Huber_Loss_Function
- class CERES_EXPORT HuberLoss final : public LossFunction {
- public:
- explicit HuberLoss(double a) : a_(a), b_(a * a) {}
- void Evaluate(double, double*) const override;
- private:
- const double a_;
- // b = a^2.
- const double b_;
- };
- // Soft L1, similar to Huber but smooth.
- //
- // rho(s) = 2 (sqrt(1 + s) - 1).
- //
- // At s = 0: rho = [0, 1, -1 / (2 * a^2)].
- class CERES_EXPORT SoftLOneLoss final : public LossFunction {
- public:
- explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) {}
- void Evaluate(double, double*) const override;
- private:
- // b = a^2.
- const double b_;
- // c = 1 / a^2.
- const double c_;
- };
- // Inspired by the Cauchy distribution
- //
- // rho(s) = log(1 + s).
- //
- // At s = 0: rho = [0, 1, -1 / a^2].
- class CERES_EXPORT CauchyLoss final : public LossFunction {
- public:
- explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) {}
- void Evaluate(double, double*) const override;
- private:
- // b = a^2.
- const double b_;
- // c = 1 / a^2.
- const double c_;
- };
- // Loss that is capped beyond a certain level using the arc-tangent function.
- // The scaling parameter 'a' determines the level where falloff occurs.
- // For costs much smaller than 'a', the loss function is linear and behaves like
- // TrivialLoss, and for values much larger than 'a' the value asymptotically
- // approaches the constant value of a * PI / 2.
- //
- // rho(s) = a atan(s / a).
- //
- // At s = 0: rho = [0, 1, 0].
- class CERES_EXPORT ArctanLoss final : public LossFunction {
- public:
- explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) {}
- void Evaluate(double, double*) const override;
- private:
- const double a_;
- // b = 1 / a^2.
- const double b_;
- };
- // Loss function that maps to approximately zero cost in a range around the
- // origin, and reverts to linear in error (quadratic in cost) beyond this range.
- // The tolerance parameter 'a' sets the nominal point at which the
- // transition occurs, and the transition size parameter 'b' sets the nominal
- // distance over which most of the transition occurs. Both a and b must be
- // greater than zero, and typically b will be set to a fraction of a.
- // The slope rho'[s] varies smoothly from about 0 at s <= a - b to
- // about 1 at s >= a + b.
- //
- // The term is computed as:
- //
- // rho(s) = b log(1 + exp((s - a) / b)) - c0.
- //
- // where c0 is chosen so that rho(0) == 0
- //
- // c0 = b log(1 + exp(-a / b)
- //
- // This has the following useful properties:
- //
- // rho(s) == 0 for s = 0
- // rho'(s) ~= 0 for s << a - b
- // rho'(s) ~= 1 for s >> a + b
- // rho''(s) > 0 for all s
- //
- // In addition, all derivatives are continuous, and the curvature is
- // concentrated in the range a - b to a + b.
- //
- // At s = 0: rho = [0, ~0, ~0].
- class CERES_EXPORT TolerantLoss final : public LossFunction {
- public:
- explicit TolerantLoss(double a, double b);
- void Evaluate(double, double*) const override;
- private:
- const double a_, b_, c_;
- };
- // This is the Tukey biweight loss function which aggressively
- // attempts to suppress large errors.
- //
- // The term is computed as follows where the equations are scaled by a
- // factor of 2 because the cost function is given by 1/2 rho(s):
- //
- // rho(s) = a^2 / 3 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
- // rho(s) = a^2 / 3 for s > a^2.
- //
- // At s = 0: rho = [0, 1, -2 / a^2]
- class CERES_EXPORT TukeyLoss final : public ceres::LossFunction {
- public:
- explicit TukeyLoss(double a) : a_squared_(a * a) {}
- void Evaluate(double, double*) const override;
- private:
- const double a_squared_;
- };
- // Composition of two loss functions. The error is the result of first
- // evaluating g followed by f to yield the composition f(g(s)).
- // The loss functions must not be nullptr.
- class CERES_EXPORT ComposedLoss final : public LossFunction {
- public:
- explicit ComposedLoss(const LossFunction* f,
- Ownership ownership_f,
- const LossFunction* g,
- Ownership ownership_g);
- ~ComposedLoss() override;
- void Evaluate(double, double*) const override;
- private:
- std::unique_ptr<const LossFunction> f_, g_;
- const Ownership ownership_f_, ownership_g_;
- };
- // The discussion above has to do with length scaling: it affects the space
- // in which s is measured. Sometimes you want to simply scale the output
- // value of the robustifier. For example, you might want to weight
- // different error terms differently (e.g., weight pixel reprojection
- // errors differently from terrain errors).
- //
- // If rho is the wrapped robustifier, then this simply outputs
- // s -> a * rho(s)
- //
- // The first and second derivatives are, not surprisingly
- // s -> a * rho'(s)
- // s -> a * rho''(s)
- //
- // Since we treat the a nullptr Loss function as the Identity loss
- // function, rho = nullptr is a valid input and will result in the input
- // being scaled by a. This provides a simple way of implementing a
- // scaled ResidualBlock.
- class CERES_EXPORT ScaledLoss final : public LossFunction {
- public:
- // Constructs a ScaledLoss wrapping another loss function. Takes
- // ownership of the wrapped loss function or not depending on the
- // ownership parameter.
- ScaledLoss(const LossFunction* rho, double a, Ownership ownership)
- : rho_(rho), a_(a), ownership_(ownership) {}
- ScaledLoss(const ScaledLoss&) = delete;
- void operator=(const ScaledLoss&) = delete;
- ~ScaledLoss() override {
- if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
- rho_.release();
- }
- }
- void Evaluate(double, double*) const override;
- private:
- std::unique_ptr<const LossFunction> rho_;
- const double a_;
- const Ownership ownership_;
- };
- // Sometimes after the optimization problem has been constructed, we
- // wish to mutate the scale of the loss function. For example, when
- // performing estimation from data which has substantial outliers,
- // convergence can be improved by starting out with a large scale,
- // optimizing the problem and then reducing the scale. This can have
- // better convergence behaviour than just using a loss function with a
- // small scale.
- //
- // This templated class allows the user to implement a loss function
- // whose scale can be mutated after an optimization problem has been
- // constructed.
- //
- // Since we treat the a nullptr Loss function as the Identity loss
- // function, rho = nullptr is a valid input.
- //
- // Example usage
- //
- // Problem problem;
- //
- // // Add parameter blocks
- //
- // CostFunction* cost_function =
- // new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
- // new UW_Camera_Mapper(feature_x, feature_y));
- //
- // LossFunctionWrapper* loss_function = new LossFunctionWrapper(
- // new HuberLoss(1.0), TAKE_OWNERSHIP);
- //
- // problem.AddResidualBlock(cost_function, loss_function, parameters);
- //
- // Solver::Options options;
- // Solger::Summary summary;
- //
- // Solve(options, &problem, &summary)
- //
- // loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
- //
- // Solve(options, &problem, &summary)
- //
- class CERES_EXPORT LossFunctionWrapper final : public LossFunction {
- public:
- LossFunctionWrapper(LossFunction* rho, Ownership ownership)
- : rho_(rho), ownership_(ownership) {}
- LossFunctionWrapper(const LossFunctionWrapper&) = delete;
- void operator=(const LossFunctionWrapper&) = delete;
- ~LossFunctionWrapper() override {
- if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
- rho_.release();
- }
- }
- void Evaluate(double sq_norm, double out[3]) const override {
- if (rho_.get() == nullptr) {
- out[0] = sq_norm;
- out[1] = 1.0;
- out[2] = 0.0;
- } else {
- rho_->Evaluate(sq_norm, out);
- }
- }
- void Reset(LossFunction* rho, Ownership ownership) {
- if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
- rho_.release();
- }
- rho_.reset(rho);
- ownership_ = ownership;
- }
- private:
- std::unique_ptr<const LossFunction> rho_;
- Ownership ownership_;
- };
- } // namespace ceres
- #include "ceres/internal/reenable_warnings.h"
- #endif // CERES_PUBLIC_LOSS_FUNCTION_H_
|