// Copyright (C) 2007 Davis E. King (davisking@users.sourceforge.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_SVm_
#define DLIB_SVm_
#include "svm_abstract.h"
#include <cmath>
#include <limits>
#include <sstream>
#include "../matrix.h"
#include "../algs.h"
#include "../serialize.h"
#include "../rand.h"
#include "../std_allocator.h"
#include "function.h"
#include "kernel.h"
#include "../enable_if.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class invalid_svm_nu_error : public dlib::error
{
public:
invalid_svm_nu_error(const std::string& msg, double nu_) : dlib::error(msg), nu(nu_) {};
const double nu;
};
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename T::type maximum_nu_impl (
const T& y
)
{
typedef typename T::type scalar_type;
// make sure requires clause is not broken
DLIB_ASSERT(y.nr() > 1 && y.nc() == 1,
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should be a column vector with more than one entry"
<< "\n\ty.nr(): " << y.nr()
<< "\n\ty.nc(): " << y.nc()
);
long pos_count = 0;
long neg_count = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == 1.0)
{
++pos_count;
}
else if (y(r) == -1.0)
{
++neg_count;
}
else
{
// make sure requires clause is not broken
DLIB_ASSERT(y(r) == -1.0 || y(r) == 1.0,
"\ttypedef T::type maximum_nu(y)"
<< "\n\ty should contain only 1 and 0 entries"
<< "\n\tr: " << r
<< "\n\ty(r): " << y(r)
);
}
}
return static_cast<scalar_type>(2.0*(scalar_type)std::min(pos_count,neg_count)/(scalar_type)y.nr());
}
template <
typename T
>
typename T::type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
template <
typename T
>
typename T::value_type maximum_nu (
const T& y
)
{
return maximum_nu_impl(vector_to_matrix(y));
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
bool is_binary_classification_problem_impl (
const T& x,
const U& x_labels
)
{
if (x.nc() != 1 || x_labels.nc() != 1) return false;
if (x.nr() != x_labels.nr()) return false;
if (x.nr() <= 1) return false;
for (long r = 0; r < x_labels.nr(); ++r)
{
if (x_labels(r) != -1 && x_labels(r) != 1)
return false;
}
return true;
}
template <
typename T,
typename U
>
bool is_binary_classification_problem (
const T& x,
const U& x_labels
)
{
return is_binary_classification_problem_impl(vector_to_matrix(x), vector_to_matrix(x_labels));
}
// ----------------------------------------------------------------------------------------
template <
typename K,
typename sample_vector_type,
typename scalar_vector_type
>
class kernel_matrix_cache
{
typedef typename K::scalar_type scalar_type;
typedef typename K::sample_type sample_type;
typedef typename K::mem_manager_type mem_manager_type;
const sample_vector_type& x;
const scalar_vector_type& y;
K kernel_function;
mutable matrix<scalar_type,0,0,mem_manager_type> cache;
mutable matrix<scalar_type,0,1,mem_manager_type> diag_cache;
mutable matrix<long,0,1,mem_manager_type> lookup;
mutable matrix<long,0,1,mem_manager_type> rlookup;
mutable long next;
/*!
INITIAL VALUE
- for all valid x:
- lookup(x) == -1
- rlookup(x) == -1
CONVENTION
- if (lookup(c) != -1) then
- cache(lookup(c),*) == the cached column c of the kernel matrix
- rlookup(lookup(c)) == c
- if (rlookup(x) != -1) then
- lookup(rlookup(x)) == x
- cache(x,*) == the cached column rlookup(x) of the kernel matrix
- next == the next row in the cache table to use to cache something
!*/
public:
kernel_matrix_cache (
const sample_vector_type& x_,
const scalar_vector_type& y_,
K kernel_function_,
long max_size_megabytes
) : x(x_), y(y_), kernel_function(kernel_function_)
{
// figure out how many rows of the kernel matrix we can have
// with the given amount of memory.
long max_size = (max_size_megabytes*1024*1024)/(x.nr()*sizeof(scalar_type));
// don't let it be 0
if (max_size == 0)
max_size = 1;
long size = std::min(max_size,x.nr());
diag_cache.set_size(x.nr(),1);
cache.set_size(size,x.nr());
lookup.set_size(x.nr(),1);
rlookup.set_size(size,1);
set_all_elements(lookup,-1);
set_all_elements(rlookup,-1);
next = 0;
for (long i = 0; i < diag_cache.nr(); ++i)
diag_cache(i) = kernel_function(x(i),x(i));
}
inline bool is_cached (
long r
) const
{
return (lookup(r) != -1);
}
inline scalar_type operator () (
long r,
long c
) const
{
if (lookup(c) != -1)
{
return cache(lookup(c),r);
}
else if (r == c)
{
return diag_cache(r);
}
else if (lookup(r) != -1)
{
// the kernel is symmetric so this is legit
return cache(lookup(r),c);
}
else
{
// if the lookup table is pointing to cache(next,*) then clear lookup(next)
if (rlookup(next) != -1)
lookup(rlookup(next)) = -1;
// make the lookup table os that it says c is now cached at the spot indicated by next
lookup(c) = next;
rlookup(next) = c;
// compute this column in the kernel matrix and store it in the cache
for (long i = 0; i < cache.nc(); ++i)
cache(next,i) = y(c)*y(i)*kernel_function(x(c),x(i));
scalar_type val = cache(next,r);
next = (next + 1)%cache.nr();
return val;
}
}
};
// ----------------------------------------------------------------------------------------
template <
typename dec_funct_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const matrix<typename dec_funct_type::scalar_type, 1, 2, typename dec_funct_type::mem_manager_type>
test_binary_decision_function_impl (
const dec_funct_type& dec_funct,
const in_sample_vector_type& x_test,
const in_scalar_vector_type& y_test
)
{
typedef typename dec_funct_type::scalar_type scalar_type;
typedef typename dec_funct_type::sample_type sample_type;
typedef typename dec_funct_type::mem_manager_type mem_manager_type;
typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT( is_binary_classification_problem(x_test,y_test) == true,
"\tmatrix test_binary_decision_function()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t is_binary_classification_problem(x_test,y_test): "
<< ((is_binary_classification_problem(x_test,y_test))? "true":"false"));
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
long num_pos_correct = 0;
long num_neg_correct = 0;
// now test this trained object
for (long i = 0; i < x_test.nr(); ++i)
{
// if this is a positive example
if (y_test(i) == +1.0)
{
++num_pos;
if (dec_funct(x_test(i)) >= 0)
++num_pos_correct;
}
else if (y_test(i) == -1.0)
{
++num_neg;
if (dec_funct(x_test(i)) < 0)
++num_neg_correct;
}
else
{
throw dlib::error("invalid input labels to the test_binary_decision_function() function");
}
}
matrix<scalar_type, 1, 2, mem_manager_type> res;
res(0) = (scalar_type)num_pos_correct/(scalar_type)(num_pos);
res(1) = (scalar_type)num_neg_correct/(scalar_type)(num_neg);
return res;
}
template <
typename dec_funct_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const matrix<typename dec_funct_type::scalar_type, 1, 2, typename dec_funct_type::mem_manager_type>
test_binary_decision_function (
const dec_funct_type& dec_funct,
const in_sample_vector_type& x_test,
const in_scalar_vector_type& y_test
)
{
return test_binary_decision_function_impl(dec_funct,
vector_to_matrix(x_test),
vector_to_matrix(y_test));
}
// ----------------------------------------------------------------------------------------
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const matrix<typename trainer_type::scalar_type, 1, 2, typename trainer_type::mem_manager_type>
cross_validate_trainer_impl (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{
typedef typename trainer_type::scalar_type scalar_type;
typedef typename trainer_type::sample_type sample_type;
typedef typename trainer_type::mem_manager_type mem_manager_type;
typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
1 < folds && folds <= x.nr(),
"\tmatrix cross_validate_trainer()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t folds: " << folds
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == +1.0)
++num_pos;
else
++num_neg;
}
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
const long num_pos_train_samples = num_pos - num_pos_test_samples;
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
typename trainer_type::trained_function_type d;
sample_vector_type x_test, x_train;
scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
long pos_idx = 0;
long neg_idx = 0;
matrix<scalar_type, 1, 2, mem_manager_type> res;
set_all_elements(res,0);
for (long i = 0; i < folds; ++i)
{
long cur = 0;
// load up our positive test samples
while (cur < num_pos_test_samples)
{
if (y(pos_idx) == +1.0)
{
x_test(cur) = x(pos_idx);
y_test(cur) = +1.0;
++cur;
}
pos_idx = (pos_idx+1)%x.nr();
}
// load up our negative test samples
while (cur < x_test.nr())
{
if (y(neg_idx) == -1.0)
{
x_test(cur) = x(neg_idx);
y_test(cur) = -1.0;
++cur;
}
neg_idx = (neg_idx+1)%x.nr();
}
// load the training data from the data following whatever we loaded
// as the testing data
long train_pos_idx = pos_idx;
long train_neg_idx = neg_idx;
cur = 0;
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
++cur;
}
train_pos_idx = (train_pos_idx+1)%x.nr();
}
// load up our negative train samples
while (cur < x_train.nr())
{
if (y(train_neg_idx) == -1.0)
{
x_train(cur) = x(train_neg_idx);
y_train(cur) = -1.0;
++cur;
}
train_neg_idx = (train_neg_idx+1)%x.nr();
}
// do the training and testing
res += test_binary_decision_function(trainer.train(x_train,y_train),x_test,y_test);
} // for (long i = 0; i < folds; ++i)
return res/(scalar_type)folds;
}
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const matrix<typename trainer_type::scalar_type, 1, 2, typename trainer_type::mem_manager_type>
cross_validate_trainer (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{
return cross_validate_trainer_impl(trainer,
vector_to_matrix(x),
vector_to_matrix(y),
folds);
}
// ----------------------------------------------------------------------------------------
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function_impl (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{
typedef typename trainer_type::sample_type sample_type;
typedef typename trainer_type::scalar_type scalar_type;
typedef typename trainer_type::mem_manager_type mem_manager_type;
typedef typename trainer_type::kernel_type K;
/*
This function fits a sigmoid function to the output of the
svm trained by svm_nu_train(). The technique used is the one
described in the paper:
Probabilistic Outputs for Support Vector Machines and
Comparisons to Regularized Likelihood Methods by
John C. Platt. Match 26, 1999
*/
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
1 < folds && folds <= x.nr(),
"\tprobabilistic_decision_function train_probabilistic_decision_function()"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t folds: " << folds
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
// count the number of positive and negative examples
long num_pos = 0;
long num_neg = 0;
for (long r = 0; r < y.nr(); ++r)
{
if (y(r) == +1.0)
++num_pos;
else
++num_neg;
}
// figure out how many positive and negative examples we will have in each fold
const long num_pos_test_samples = num_pos/folds;
const long num_pos_train_samples = num_pos - num_pos_test_samples;
const long num_neg_test_samples = num_neg/folds;
const long num_neg_train_samples = num_neg - num_neg_test_samples;
decision_function<K> d;
typename decision_function<K>::sample_vector_type x_test, x_train;
typename decision_function<K>::scalar_vector_type y_test, y_train;
x_test.set_size (num_pos_test_samples + num_neg_test_samples);
y_test.set_size (num_pos_test_samples + num_neg_test_samples);
x_train.set_size(num_pos_train_samples + num_neg_train_samples);
y_train.set_size(num_pos_train_samples + num_neg_train_samples);
typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type_vector;
typedef std::vector<scalar_type, alloc_scalar_type_vector > dvector;
typedef std_allocator<int, mem_manager_type> alloc_int_vector;
typedef std::vector<int, alloc_int_vector > ivector;
dvector out;
ivector target;
long pos_idx = 0;
long neg_idx = 0;
for (long i = 0; i < folds; ++i)
{
long cur = 0;
// load up our positive test samples
while (cur < num_pos_test_samples)
{
if (y(pos_idx) == +1.0)
{
x_test(cur) = x(pos_idx);
y_test(cur) = +1.0;
++cur;
}
pos_idx = (pos_idx+1)%x.nr();
}
// load up our negative test samples
while (cur < x_test.nr())
{
if (y(neg_idx) == -1.0)
{
x_test(cur) = x(neg_idx);
y_test(cur) = -1.0;
++cur;
}
neg_idx = (neg_idx+1)%x.nr();
}
// load the training data from the data following whatever we loaded
// as the testing data
long train_pos_idx = pos_idx;
long train_neg_idx = neg_idx;
cur = 0;
// load up our positive train samples
while (cur < num_pos_train_samples)
{
if (y(train_pos_idx) == +1.0)
{
x_train(cur) = x(train_pos_idx);
y_train(cur) = +1.0;
++cur;
}
train_pos_idx = (train_pos_idx+1)%x.nr();
}
// load up our negative train samples
while (cur < x_train.nr())
{
if (y(train_neg_idx) == -1.0)
{
x_train(cur) = x(train_neg_idx);
y_train(cur) = -1.0;
++cur;
}
train_neg_idx = (train_neg_idx+1)%x.nr();
}
// do the training
d = trainer.train (x_train,y_train);
// now test this fold
for (long i = 0; i < x_test.nr(); ++i)
{
out.push_back(d(x_test(i)));
// if this was a positive example
if (y_test(i) == +1.0)
{
target.push_back(1);
}
else if (y_test(i) == -1.0)
{
target.push_back(0);
}
else
{
throw dlib::error("invalid input labels to the train_probabilistic_decision_function() function");
}
}
} // for (long i = 0; i < folds; ++i)
// Now find the parameters of the sigmoid. Do so using the method from the
// above referenced paper.
scalar_type prior0 = num_pos_test_samples*folds;
scalar_type prior1 = num_neg_test_samples*folds;
scalar_type A = 0;
scalar_type B = std::log((prior0+1)/(prior1+1));
const scalar_type hiTarget = (prior1+1)/(prior1+2);
const scalar_type loTarget = 1.0/(prior0+2);
scalar_type lambda = 1e-3;
scalar_type olderr = std::numeric_limits<scalar_type>::max();;
dvector pp(out.size(),(prior1+1)/(prior1+prior0+2));
const scalar_type min_log = -200.0;
scalar_type t = 0;
int count = 0;
for (int it = 0; it < 100; ++it)
{
scalar_type a = 0;
scalar_type b = 0;
scalar_type c = 0;
scalar_type d = 0;
scalar_type e = 0;
// First, compute Hessian & gradient of error function with
// respect to A & B
for (unsigned long i = 0; i < out.size(); ++i)
{
if (target[i])
t = hiTarget;
else
t = loTarget;
const scalar_type d1 = pp[i] - t;
const scalar_type d2 = pp[i]*(1-pp[i]);
a += out[i]*out[i]*d2;
b += d2;
c += out[i]*d2;
d += out[i]*d1;
e += d1;
}
// If gradient is really tiny, then stop.
if (std::abs(d) < 1e-9 && std::abs(e) < 1e-9)
break;
scalar_type oldA = A;
scalar_type oldB = B;
scalar_type err = 0;
// Loop until goodness of fit increases
while (true)
{
scalar_type det = (a+lambda)*(b+lambda)-c*c;
// if determinant of Hessian is really close to zero then increase stabilizer.
if (std::abs(det) <= std::numeric_limits<scalar_type>::epsilon())
{
lambda *= 10;
continue;
}
A = oldA + ((b+lambda)*d-c*e)/det;
B = oldB + ((a+lambda)*e-c*d)/det;
// Now, compute the goodness of fit
err = 0;
for (unsigned long i = 0; i < out.size(); ++i)
{
if (target[i])
t = hiTarget;
else
t = loTarget;
scalar_type p = 1.0/(1.0+std::exp(out[i]*A+B));
pp[i] = p;
// At this step, make sure log(0) returns min_log
err -= t*std::max(std::log(p),min_log) + (1-t)*std::max(std::log(1-p),min_log);
}
if (err < olderr*(1+1e-7))
{
lambda *= 0.1;
break;
}
// error did not decrease: increase stabilizer by factor of 10
// & try again
lambda *= 10;
if (lambda >= 1e6) // something is broken. Give up
break;
}
scalar_type diff = err-olderr;
scalar_type scale = 0.5*(err+olderr+1.0);
if (diff > -1e-3*scale && diff < 1e-7*scale)
++count;
else
count = 0;
olderr = err;
if (count == 3)
break;
}
return probabilistic_decision_function<K>( A, B, trainer.train(x,y) );
}
template <
typename trainer_type,
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const probabilistic_decision_function<typename trainer_type::kernel_type> train_probabilistic_decision_function (
const trainer_type& trainer,
const in_sample_vector_type& x,
const in_scalar_vector_type& y,
const long folds
)
{
return train_probabilistic_decision_function_impl(trainer,
vector_to_matrix(x),
vector_to_matrix(y),
folds);
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
typename enable_if<is_matrix<T>,void>::type randomize_samples (
T& t,
U& u
)
{
rand::kernel_1a r;
long n = t.nr()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t(idx), t(n));
exchange(u(idx), u(n));
--n;
}
}
// ----------------------------------------------------------------------------------------
template <
typename T,
typename U
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t,
U& u
)
{
rand::kernel_1a r;
long n = t.size()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t[idx], t[n]);
exchange(u[idx], u[n]);
--n;
}
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename enable_if<is_matrix<T>,void>::type randomize_samples (
T& t
)
{
rand::kernel_1a r;
long n = t.nr()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t(idx), t(n));
--n;
}
}
// ----------------------------------------------------------------------------------------
template <
typename T
>
typename disable_if<is_matrix<T>,void>::type randomize_samples (
T& t
)
{
rand::kernel_1a r;
long n = t.size()-1;
while (n > 0)
{
// put a random integer into idx
unsigned long idx = r.get_random_32bit_number();
// make idx be less than n
idx %= n;
// swap our randomly selected index into the n position
exchange(t[idx], t[n]);
--n;
}
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename K
>
class svm_nu_trainer
{
public:
typedef K kernel_type;
typedef typename kernel_type::scalar_type scalar_type;
typedef typename kernel_type::sample_type sample_type;
typedef typename kernel_type::mem_manager_type mem_manager_type;
typedef decision_function<kernel_type> trained_function_type;
svm_nu_trainer (
) :
nu(0.1),
cache_size(200),
eps(0.001)
{
}
svm_nu_trainer (
const kernel_type& kernel_,
const scalar_type& nu_
) :
kernel_function(kernel_),
nu(nu_),
cache_size(200),
eps(0.001)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu && nu <= 1,
"\tsvm_nu_trainer::svm_nu_trainer(kernel,nu)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu
);
}
void set_cache_size (
long cache_size_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(cache_size_ > 0,
"\tvoid svm_nu_trainer::set_cache_size(cache_size_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t cache_size: " << cache_size_
);
cache_size = cache_size_;
}
const long get_cache_size (
) const
{
return cache_size;
}
void set_epsilon (
scalar_type eps_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(eps_ > 0,
"\tvoid svm_nu_trainer::set_epsilon(eps_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t eps: " << eps_
);
eps = eps_;
}
const scalar_type get_epsilon (
) const
{
return eps;
}
void set_kernel (
const kernel_type& k
)
{
kernel_function = k;
}
const kernel_type& get_kernel (
) const
{
return kernel_function;
}
void set_nu (
scalar_type nu_
)
{
// make sure requires clause is not broken
DLIB_ASSERT(0 < nu_ && nu_ <= 1,
"\tvoid svm_nu_trainer::set_nu(nu_)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t nu: " << nu_
);
nu = nu_;
}
const scalar_type get_nu (
) const
{
return nu;
}
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
return do_train(vector_to_matrix(x), vector_to_matrix(y));
}
void swap (
svm_nu_trainer& item
)
{
exchange(kernel_function, item.kernel_function);
exchange(nu, item.nu);
exchange(cache_size, item.cache_size);
exchange(eps, item.eps);
}
private:
// ------------------------------------------------------------------------------------
template <
typename in_sample_vector_type,
typename in_scalar_vector_type
>
const decision_function<kernel_type> do_train (
const in_sample_vector_type& x,
const in_scalar_vector_type& y
) const
{
typedef typename K::scalar_type scalar_type;
typedef typename decision_function<K>::sample_vector_type sample_vector_type;
typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
// make sure requires clause is not broken
DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
"\tdecision_function svm_nu_trainer::train(x,y)"
<< "\n\t invalid inputs were given to this function"
<< "\n\t x.nr(): " << x.nr()
<< "\n\t y.nr(): " << y.nr()
<< "\n\t x.nc(): " << x.nc()
<< "\n\t y.nc(): " << y.nc()
<< "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
);
const scalar_type tau = 1e-12;
scalar_vector_type df; // delta f(alpha)
scalar_vector_type alpha;
kernel_matrix_cache<K, in_sample_vector_type, in_scalar_vector_type> Q(x,y,kernel_function,cache_size);
alpha.set_size(x.nr());
df.set_size(x.nr());
// now initialize alpha
set_initial_alpha(y, nu, alpha);
// initialize df. Compute df = Q*alpha
for (long r = 0; r < df.nr(); ++r)
{
df(r) = 0;
for (long c = 0; c < alpha.nr(); ++c)
{
if (alpha(c) != 0)
df(r) += Q(c,r)*alpha(c);
}
}
// now perform the actual optimization of alpha
long i, j;
while (find_working_group(y,alpha,Q,df,tau,eps,i,j))
{
const scalar_type old_alpha_i = alpha(i);
const scalar_type old_alpha_j = alpha(j);
optimize_working_pair(y,alpha,Q,df,tau,i,j);
// update the df vector now that we have modified alpha(i) and alpha(j)
scalar_type delta_alpha_i = alpha(i) - old_alpha_i;
scalar_type delta_alpha_j = alpha(j) - old_alpha_j;
for(long k = 0; k < df.nr(); ++k)
df(k) += Q(k,i)*delta_alpha_i + Q(k,j)*delta_alpha_j;
}
scalar_type rho, b;
calculate_rho_and_b(y,alpha,df,rho,b);
alpha = pointwise_multiply(alpha,y)/rho;
// count the number of support vectors
long sv_count = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
++sv_count;
}
scalar_vector_type sv_alpha;
sample_vector_type support_vectors;
// size these column vectors so that they have an entry for each support vector
sv_alpha.set_size(sv_count);
support_vectors.set_size(sv_count);
// load the support vectors and their alpha values into these new column matrices
long idx = 0;
for (long i = 0; i < alpha.nr(); ++i)
{
if (alpha(i) != 0)
{
sv_alpha(idx) = alpha(i);
support_vectors(idx) = x(i);
++idx;
}
}
// now return the decision function
return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
}
// ------------------------------------------------------------------------------------
template <
typename scalar_type,
typename scalar_vector_type,
typename scalar_vector_type2
>
inline void set_initial_alpha (
const scalar_vector_type& y,
const scalar_type nu,
scalar_vector_type2& alpha
) const
{
set_all_elements(alpha,0);
const scalar_type l = y.nr();
scalar_type temp = nu*l/2;
long num = (long)std::floor(temp);
long num_total = (long)std::ceil(temp);
int count = 0;
for (int i = 0; i < alpha.nr(); ++i)
{
if (y(i) == 1)
{
if (count < num)
{
++count;
alpha(i) = 1;
}
else
{
if (temp > num)
{
++count;
<