classes/MEAL/LevenbergMarquardt_8h_source.shtml

//-*-C++-*-

/***************************************************************************

 *

 *   Copyright (C) 2004 by Willem van Straten

 *   Licensed under the Academic Free License version 2.1

 *

 ***************************************************************************/


// psrchive/More/MEAL/MEAL/LevenbergMarquardt.h


#ifndef __Levenberg_Marquardt_h

#define __Levenberg_Marquardt_h


#include "MEAL/GaussJordan.h"

#include "MEAL/Axis.h"

#include "Estimate.h"

#include "Error.h"

#include "true_math.h"


#include <iostream>

#include <cmath>


namespace MEAL

{

  class RestorePolicy;


      unsigned get_nparam () const;


      double get_param (unsigned iparam) const;


      void set_param (unsigned iparam, ) const;


      bool get_infit (unsigned index) const;


      Yt evaluate (std::vector<Gt>* gradient);


    };

    </pre>


    The type of the gradient, Gt, is explicity specified in the

    declaration of this template class.  The types of At and Yt are

    implicitly specified by the template instantiation of the methods

    of this class.  If Yt or Gt is not of type float or double, there

    must also be defined:


    <pre>

    const Yt operator - (const Yt &, const Yt &);


    const Gt operator * (const Yt &, const Gt &);


    <\pre>


    The LevenbergMarquardt class is used in three stages:

    <UL>

    <LI> call to ::init() with data and model

    <LI> repeated calls to ::iter() with data and model, comparing the chisq

    returned in order to determine convergence (or not) of fit

    <LI>call to ::result() to get curvature and covariance matrices

    </UL>

  */

  template <class Grad>

  class LevenbergMarquardt

  {


  public:

    static unsigned verbose;


    LevenbergMarquardt ()

    {

      lamda_increase_factor = 10.0;

      lamda_decrease_factor = 0.1;

      singular_threshold = 1e-8;

      restore_policy = NULL;

    }


    template <class At, class Et, class Mt>

    float init (const std::vector< At >& x,

                const std::vector< Et >& y,

                Mt& model);


    template <class At, class Et, class Mt>

    float iter (const std::vector< At >& x,

                const std::vector< Et >& y,

                Mt& model);


    template <class Mt>

    void result (Mt& model,

                 std::vector<std::vector<double> >& covariance = null_arg,

                 std::vector<std::vector<double> >& curvature = null_arg);


    double get_log_det_curvature() const { return log_det_alpha; }


    unsigned get_nparam_infit() const { return nparam_infit; }


    float lamda;


    float lamda_increase_factor;

    float lamda_decrease_factor;


    float singular_threshold;


    RestorePolicy* restore_policy;


  protected:


    template <class Mt> void solve_delta (const Mt& model);


    template <class At, class Et, class Mt>

    float calculate_chisq (const std::vector< At >& x,

                          const std::vector< Et >& y,

                          Mt& model);


  private:


    std::vector<Grad> gradient;


    std::vector<double> beta;


    std::vector<std::vector<double> > alpha;

    double log_det_alpha = 0.0;


    std::vector<std::vector<double> > delta;


    std::vector<std::string> names;

    std::vector<const char*> name_ptrs;


    float best_chisq = 0;


    std::vector<std::vector<double> > best_alpha;

    std::vector<double> best_beta;


    unsigned nparam_infit = 0;


    std::vector<double> backup;


    static std::vector<std::vector<double> > null_arg;

  };


  template<class At>

  class AbscissaTraits

  {

  public:

    template<class Mt>

    static void apply (Mt& model, const At& abscissa)

    { abscissa.apply(); }

  };


  template<>

  class AbscissaTraits<double>

  {

  public:

    template<class Mt>

    static void apply (Mt& model, double abscissa)

    { model.set_abscissa(abscissa); }

  };


  class RestorePolicy

  {

  public:

    virtual void store () = 0;

    virtual void restore () = 0;

  };


  template<class Et>

  class WeightingScheme

  {

  public:


    typedef typename Et::val_type val_type;

    typedef typename Et::var_type var_type;


    WeightingScheme (const Et& estimate)

    {

      set_variance (estimate.get_variance());

    }


    void set_variance (const var_type& variance)

    {

      inverse_variance = 1.0 / variance;

    }


    val_type difference (const Et& estimate, const val_type& model)

    {

      return estimate.get_value() - model;

    }


    val_type norm (const val_type& x) const

    {

      return x*x;

    }


    val_type get_weighted_conjugate (const val_type& data) const

    {

      return data * inverse_variance;

    }


    float get_weighted_norm (const val_type& data) const

    {

      return norm(data) * inverse_variance;

    }


    var_type inverse_variance;


  };


  template<class Et>

  class WeightingScheme< std::complex<Et> >

  {


  public:


    typedef std::complex<Et> type;

    typedef std::complex<typename Et::val_type> val_type;

    typedef typename Et::var_type var_type;


    WeightingScheme (const type& estimate)

    {

      set_variance (estimate);

    }


    void set_variance (const type& estimate)

    {

      inv_var_real = 1.0 / estimate.real().get_variance();

      inv_var_imag = 1.0 / estimate.imag().get_variance();

    }


    val_type difference (const type& estimate, const val_type& model)

    {

      val_type val (estimate.real().get_value(), estimate.imag().get_value());

      return val - model;

    }


    val_type norm (const val_type& x) const

    {

      return std::norm(x);

    }


    val_type get_weighted_conjugate (const val_type& data) const

    {

      return val_type (data.real()*inv_var_real, -data.imag()*inv_var_imag);

    }


    float get_weighted_norm (const val_type& data) const

    {

      return data.real()*data.real()*inv_var_real + data.imag()*data.imag()*inv_var_imag;

    }


    var_type inv_var_real;

    var_type inv_var_imag;

  };


  template <class Mt, class At, class Et, class Grad>

  float lmcoff (// input

                Mt& model,

                const At& abscissa,

                const Et& data,

                // storage

                std::vector<Grad>& gradient,

                // output

                std::vector<std::vector<double> >& alpha,

                std::vector<double>& beta);


  template <class Mt, class Yt, class Wt, class Grad>

  float lmcoff1 (// input

                 Mt& model,

                 const Yt& delta_data,

                 const Wt& weighting_scheme,

                 const std::vector<Grad>& gradient,

                 // output

                 std::vector<std::vector<double> >& alpha,

                 std::vector<double>& beta);


  template<class Mt>

  std::string get_name (const Mt& model, unsigned iparam);


}


template <class Grad>

std::vector<std::vector<double> > MEAL::LevenbergMarquardt<Grad>::null_arg;


template <class Grad>

unsigned MEAL::LevenbergMarquardt<Grad>::verbose = 0;


template <class Grad>

template <class At, class Et, class Mt>

float MEAL::LevenbergMarquardt<Grad>::init

(const std::vector< At >& x,

 const std::vector< Et >& y,

 Mt& model)

{

  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::init" << std::endl;


  // size all of the working space arrays

  alpha.resize  (model.get_nparam());

  beta.resize   (model.get_nparam());

  delta.resize  (model.get_nparam());

  backup.resize (model.get_nparam());

  names.resize  (model.get_nparam());

  name_ptrs.resize (model.get_nparam());


  for (unsigned j=0; j<model.get_nparam(); j++)

  {

    alpha[j].resize (model.get_nparam());

    delta[j].resize (1);

  }


  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::init calculate chisq" << std::endl;


  best_chisq = calculate_chisq (x, y, model);

  best_alpha = alpha;

  best_beta = beta;

  lamda = 0.001;


  if (verbose > 0)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::init chisq=" << best_chisq << std::endl;


  return best_chisq;

}


template<class T>

void verify_orthogonal (const std::vector<std::vector<double > >& alpha, const T& model)

{

  unsigned nrow = alpha.size();


  if (!nrow)

    return;


  unsigned nparam = model.get_nparam ();


  unsigned nfree = 0;

  for (unsigned iparam=0; iparam < nparam; iparam++)

    if (model.get_infit(iparam))

      nfree ++;


  /*

    Convert row numbers to parameter names

  */

  std::vector<std::string> names (nfree);

  std::vector<unsigned> indeces (nfree);


  unsigned kparam = 0;

  for (unsigned krow=0; krow<nfree; krow++)

  {

    while (!model.get_infit(kparam))

      kparam ++;


    names[krow] = model.get_param_name(kparam);

    indeces[krow] = kparam;


    kparam ++;

  }


  std::vector<double> row_mod (nfree, 0.0);


  /*

    calculate the norm of each row vector

  */


  for (unsigned irow=0; irow<nfree; irow++)

  {

    double norm = 0.0;

    for (unsigned jcol=0; jcol<nfree; jcol++)

      norm += alpha[irow][jcol] * alpha[irow][jcol];


    row_mod[irow] = sqrt(norm);


    if (row_mod[irow] == 0)

      std::cerr << irow << "=" << names[irow] << " gradient = 0" << std::endl;

  }


  for (unsigned krow=0; krow<nfree; krow++)

  {

    if (row_mod[krow] == 0)

      continue;


    for (unsigned irow=krow+1; irow<nfree; irow++)

    {

      if (row_mod[irow] == 0)

      {

        continue;

      }


      double degen = 0.0;

      for (unsigned jcol=0; jcol<nfree; jcol++)

      {

        degen += alpha[krow][jcol] * alpha[irow][jcol];

      }


      degen /= row_mod[krow] * row_mod[irow];


      if (degen > 0.8)

      {

        double ival = model.get_param(indeces[irow]);

              double kval = model.get_param(indeces[krow]);


        std::cerr << "degen(" << names[krow] << "," << names[irow] << ") = "

          << degen << std::endl

          << "\t" << names[krow] << " = " << kval << std::endl

          << "\t" << names[irow] << " = " << ival << std::endl;

      }


      if (!true_math::finite(degen))

      {

        std::cerr << "NaN or Inf in curvature matrix" << std::endl;

        return;

      }

    }

  }

}


template<typename Mt>

std::string MEAL::get_name (const Mt& model, unsigned iparam)

{

  unsigned ifree = 0;

  for (unsigned i=0; i < model.get_nparam(); i++)

  {

    if (model.get_infit(i))

    {

      if (ifree == iparam)

              return model.get_param_name(i);

      ifree ++;

    }

  }

  return "unknown";

}


// /////////////////////////////////////////////////////////////////////////

// MEAL::LevenbergMarquardt<Grad>::solve_delta

// /////////////////////////////////////////////////////////////////////////


template <class Grad>

template <class Mt>

void MEAL::LevenbergMarquardt<Grad>::solve_delta (const Mt& model)

{

  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::solve_delta" << std::endl;


  if (alpha.size() != model.get_nparam())

  {

    throw Error (InvalidState,

                 "MEAL::LevenbergMarquardt<Grad>::solve_delta",

                  "alpha.size=%d != model.nparam=%d",

                 alpha.size(), model.get_nparam());

  }


  if (verbose > 0)

  {

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::solve_delta lamda="

             << lamda << " nparam=" << model.get_nparam() << std::endl;

  }


  unsigned iinfit = 0;

  for (unsigned ifit=0; ifit<model.get_nparam(); ifit++)

  {

    if (verbose > 0)

      std::cerr << "MEAL::LevenbergMarquardt<Grad>::solve_delta i=" << ifit

                    << " " << model.get_param_name(ifit);


    if (model.get_infit(ifit))

    {

      if (verbose > 0)

              std::cerr << " in fit" << std::endl;


      unsigned jinfit = 0;

      for (unsigned jfit=0; jfit<model.get_nparam(); jfit++)

      {

        if (model.get_infit(jfit))

        {

                alpha[iinfit][jinfit]=best_alpha[ifit][jfit];

                jinfit ++;

              }

      }


      alpha[iinfit][iinfit] *= (1.0 + lamda);

      delta[iinfit][0]=best_beta[ifit];

      names[iinfit] = model.get_param_name(ifit);

      name_ptrs[iinfit] = names[iinfit].c_str();


      iinfit ++;

    }

    else if (verbose > 0)

      std::cerr << " fixed" << std::endl;

  }


  if (iinfit == 0)

    throw Error (InvalidState, "MEAL::LevenbergMarquardt<Grad>::solve_delta", "no parameters in fit");


  nparam_infit = iinfit;


  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::solve_delta for " << iinfit << " parameters" << std::endl;


  std::vector<std::vector<double> > temp_copy (alpha);


  try

  {

    // invert Equation 15.5.14

    log_det_alpha = MEAL::GaussJordan (alpha, delta, iinfit, singular_threshold, &name_ptrs);

  }

  catch (Error& error)

  {

    if (verbose > 0)

      verify_orthogonal (temp_copy, model);

    throw error += "MEAL::LevenbergMarquardt<Grad>::solve_delta";

  }


  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::solve_delta exit" << std::endl;

}


// /////////////////////////////////////////////////////////////////////////

// MEAL::LevenbergMarquardt<Grad>::iter

// /////////////////////////////////////////////////////////////////////////


template <class Grad>

template <class At, class Et, class Mt>

float MEAL::LevenbergMarquardt<Grad>::iter

( const std::vector< At >& x,

  const std::vector< Et >& y,

  Mt& model )

{

  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::iter" << std::endl;


  solve_delta (model);


  // After call to solve_delta, delta contains required change in model

  // parameters.  Update the model.


  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::iter update model" << std::endl;


  unsigned iinfit = 0;

  for (unsigned ifit=0; ifit<model.get_nparam(); ifit++)

  {

    double change = 0.0;


    if (model.get_infit(ifit))

    {

      change = delta[iinfit][0];

      iinfit ++;

    }


    backup[ifit] = model.get_param (ifit);


    if (verbose > 2)

      std::cerr << "   delta[" << ifit << "]=" << change << std::endl;


    model.set_param (ifit, backup[ifit] + change);

  }


  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::iter calculate new chisq" << std::endl;


  float new_chisq = calculate_chisq (x, y, model);


  if (new_chisq < best_chisq)

  {

    lamda *= lamda_decrease_factor;


    if (verbose)

      std::cerr << "MEAL::LevenbergMarquardt<Grad>::iter new chisq="

           << new_chisq << "\n  better fit; lamda=" << lamda << std::endl;


    if (restore_policy)

      restore_policy->store ();


    best_chisq = new_chisq;

    best_alpha = alpha;

    best_beta  = beta;

  }

  else

  {

    lamda *= lamda_increase_factor;


    if (verbose)

      std::cerr << "MEAL::LevenbergMarquardt<Grad>::iter new chisq="

           << new_chisq << "\n  worse fit; lamda=" << lamda << std::endl;


    if (restore_policy)

      restore_policy->restore ();


    // restore the old model

    for (unsigned iparm=0; iparm<model.get_nparam(); iparm++)

      model.set_param (iparm, backup[iparm]);

  }


  return new_chisq;

}


// /////////////////////////////////////////////////////////////////////////

// MEAL::LevenbergMarquardt<Grad>::result

// /////////////////////////////////////////////////////////////////////////


template <class Grad>

template <class Mt>

void MEAL::LevenbergMarquardt<Grad>::result

( Mt& model,

  std::vector<std::vector<double> >& covar,

  std::vector<std::vector<double> >& curve )

{

  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::result" << std::endl;


  if (&curve != &null_arg)

    curve = best_alpha;


  lamda = 0.0;

  solve_delta (model);


  if (&covar == &null_arg)

    return;


  covar.resize (model.get_nparam());


  unsigned iindim = 0;

  for (unsigned idim=0; idim < model.get_nparam(); idim++)

  {

    covar[idim].resize (model.get_nparam());


    if (!model.get_infit(idim))

    {

      for (unsigned jdim=0; jdim < model.get_nparam(); jdim++)

              covar[idim][jdim] = 0;

    }

    else

    {

      unsigned jindim = 0;

      for (unsigned jdim=0; jdim < model.get_nparam(); jdim++)

      {

        if (model.get_infit(jdim))

        {

          covar[idim][jdim] = alpha [iindim][jindim];

                jindim ++;

              }

              else

                covar[idim][jdim] = 0;

      }

      iindim ++;

    }

  }

}


// /////////////////////////////////////////////////////////////////////////

// MEAL::LevenbergMarquardt<Grad>::chisq

// /////////////////////////////////////////////////////////////////////////


template <class Grad>

template <class At, class Et, class Mt>

float MEAL::LevenbergMarquardt<Grad>::calculate_chisq

(const std::vector< At >& x,

 const std::vector< Et >& y,

 Mt& model)

{

  if (verbose > 2)

    std::cerr << "MEAL::LevenbergMarquardt<Grad>::chisq nparam="

              << model.get_nparam() << std::endl;


  if (alpha.size() != model.get_nparam())

    throw Error (InvalidState, "MEAL::LevenbergMarquardt<Grad>::chisq",

                 "alpha.size=%d != model.nparam=%d",

                 alpha.size(), model.get_nparam());


  if (y.size() < x.size())

    throw Error (InvalidParam, "MEAL::LevenbergMarquardt<Grad>::chisq",

                 "y.size=%d < x.size=%d", y.size(), x.size());


  // initialize sums

  double Chisq = 0.0;

  for (unsigned j=0; j<alpha.size(); j++)

  {

    for (unsigned k=0; k<=j; k++)

      alpha[j][k] = 0.0;

    beta[j] = 0.0;

  }


  for (unsigned ipt=0; ipt < x.size(); ipt++)

  {

    if (verbose > 2)

      std::cerr << "MEAL::LevenbergMarquardt<Grad>::chisq lmcoff[" << ipt << "/" << x.size() << "]" << std::endl;


    Chisq += lmcoff (model, x[ipt], y[ipt], gradient, alpha, beta);

  }


  // populate the symmetric half of the curvature matrix

  for (unsigned ifit=1; ifit<model.get_nparam(); ifit++)

    for (unsigned jfit=0; jfit<ifit; jfit++)

      alpha[jfit][ifit]=alpha[ifit][jfit];


  return Chisq;

}


template <class Mt, class At, class Et, class Grad>

float MEAL::lmcoff (

                    // input

                    Mt& model,

                    const At& abscissa,

                    const Et& data,

                    // storage

                    std::vector<Grad>& gradient,

                    // output

                    std::vector<std::vector<double> >& alpha,

                    std::vector<double>& beta

                    )

try

{

  if (LevenbergMarquardt<Grad>::verbose > 2)

    std::cerr << "MEAL::lmcoff data=" << data << std::endl;


  AbscissaTraits<At>::apply (model, abscissa);


  if (LevenbergMarquardt<Grad>::verbose > 2)

    std::cerr << "MEAL::lmcoff abscissa applied" << std::endl;


  WeightingScheme<Et> weight (data);


  float result = lmcoff1 (model,

                          weight.difference (data, model.evaluate (&gradient)),

                          weight, gradient, alpha, beta);


  if (LevenbergMarquardt<Grad>::verbose > 2)

    std::cerr << "MEAL::lmcoff lmcoff1 computed" << std::endl;


  return result;

}

catch (Error& error)

{

  error << "\n\t" "data=" << data << " model=" << model.evaluate ();

  throw error += "MEAL::lmcoff";

}


template <class Mt, class Yt, class Wt, class Grad>

float MEAL::lmcoff1 (

                     // input

                     Mt& model,

                     const Yt& delta_y,

                     const Wt& weight,

                     const std::vector<Grad>& gradient,

                     // output

                     std::vector<std::vector<double> >& alpha,

                     std::vector<double>& beta

                     )

try

{

  ElementTraits<Grad> traits;


  if (LevenbergMarquardt<Grad>::verbose > 2)

    std::cerr << "MEAL::lmcoff1 delta_y=" << delta_y << std::endl;


  Yt w_delta_y = weight.get_weighted_conjugate (delta_y);


  for (unsigned ifit=0; ifit < model.get_nparam(); ifit++)

  {

    if (model.get_infit(ifit))

    {

      double term = traits.to_real (w_delta_y * gradient[ifit]);

      if (!true_math::finite(term))

        throw Error (InvalidState, "MEAL::lmcoff1"

                     "non-finite contribution to beta");


      // Equation 15.5.6 (with 15.5.8)

      beta[ifit] += term;


      if (LevenbergMarquardt<Grad>::verbose > 2)

        std::cerr << "MEAL::lmcoff1 compute weighted conjugate of gradient"

                     "[" << ifit << "]" << std::endl;


      Grad w_gradient = weight.get_weighted_conjugate (gradient[ifit]);


      if (LevenbergMarquardt<Grad>::verbose > 2)

        std::cerr << "MEAL::lmcoff1 add to curvature matrix" << std::endl;


      // Equation 15.5.11 of NR

      for (unsigned jfit=0; jfit <= ifit; jfit++)

      {

        if (model.get_infit(jfit))

        {

          double term = traits.to_real (w_gradient * gradient[jfit]);

          if (!true_math::finite(term))

            throw Error (InvalidState, "MEAL::lmcoff1", "non-finite contribution to alpha");


          alpha[ifit][jfit] += term;

        }

      }

    }

  }


  // Equation 15.5.5

  float chisq = weight.get_weighted_norm (delta_y);


  if (LevenbergMarquardt<Grad>::verbose > 1 || !true_math::finite(chisq))

    std::cerr << "MEAL::lmcoff1 chisq=" << chisq << std::endl;


  return chisq;

}

catch (Error& error)

{

  error << "\n\t" "delta_y=" << delta_y;

  throw error += "MEAL::lmcoff1";

}


#endif

Error

FTransform::type
type

MEAL
Namespace in which all modeling and calibration related code is declared.
Definition ExampleComplex2.h:16

MEAL::GaussJordan
T GaussJordan(std::vector< std::vector< T > > &a, std::vector< std::vector< U > > &b, int nrow=-1, double singular_threshold=0.0, std::vector< const char * > *names=0)
Definition GaussJordan.h:44

MEAL::sqrt
const ScalarMath sqrt(const ScalarMath &x)
Return a ScalarMath instance representing x^.5.
Definition ScalarMath.C:151

Reference::verbose
bool verbose