pktools  2.6.3
Processing Kernel for geospatial data
pkfsann.cc
1 /**********************************************************************
2 pkfsann.cc: feature selection for artificial neural network classifier pkann
3 Copyright (C) 2008-2014 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <stdlib.h>
21 #include <vector>
22 #include <string>
23 #include <map>
24 #include <algorithm>
25 #include "base/Optionpk.h"
26 #include "imageclasses/ImgReaderOgr.h"
27 #include "algorithms/ConfusionMatrix.h"
28 #include "algorithms/CostFactory.h"
29 #include "algorithms/FeatureSelector.h"
30 #include "floatfann.h"
31 #include "algorithms/myfann_cpp.h"
32 #include "pkfsann.h"
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
37 
38 /******************************************************************************/
94 using namespace std;
95 
96 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
97 
98 CostFactoryANN::CostFactoryANN(const vector<unsigned int>& nneuron, float connection, const std::vector<float> weights, float learning, unsigned int maxit, unsigned short cv, bool verbose)
99  : CostFactory(cv,verbose), m_nneuron(nneuron), m_connection(connection), m_weights(weights), m_learning(learning), m_maxit(maxit){};
100 
101 CostFactoryANN::~CostFactoryANN(){
102 }
103 
104 double CostFactoryANN::getCost(const vector<Vector2d<float> > &trainingFeatures)
105 {
106  unsigned short nclass=trainingFeatures.size();
107  unsigned int ntraining=0;
108  unsigned int ntest=0;
109  for(int iclass=0;iclass<nclass;++iclass){
110  ntraining+=m_nctraining[iclass];
111  ntest+=m_nctest[iclass];
112  }
113  if(ntest)
114  assert(!m_cv);
115  if(!m_cv)
116  assert(ntest);
117  unsigned short nFeatures=trainingFeatures[0][0].size();
118 
119  FANN::neural_net net;//the neural network
120  const unsigned int num_layers = m_nneuron.size()+2;
121  const float desired_error = 0.0003;
122  const unsigned int iterations_between_reports = (m_verbose) ? m_maxit+1:0;
123  if(m_verbose>1){
124  cout << "creating artificial neural network with " << m_nneuron.size() << " hidden layer, having " << endl;
125  for(int ilayer=0;ilayer<m_nneuron.size();++ilayer)
126  cout << m_nneuron[ilayer] << " ";
127  cout << "neurons" << endl;
128  }
129  switch(num_layers){
130  case(3):{
131  unsigned int layers[3];
132  layers[0]=nFeatures;
133  layers[1]=m_nneuron[0];
134  layers[2]=nclass;
135  net.create_sparse_array(m_connection,num_layers,layers);
136  break;
137  }
138  case(4):{
139  unsigned int layers[4];
140  layers[0]=nFeatures;
141  layers[1]=m_nneuron[0];
142  layers[2]=m_nneuron[1];
143  layers[3]=nclass;
144  net.create_sparse_array(m_connection,num_layers,layers);
145  break;
146  }
147  default:
148  cerr << "Only 1 or 2 hidden layers are supported!" << endl;
149  exit(1);
150  break;
151  }
152 
153  net.set_learning_rate(m_learning);
154 
155  net.set_activation_function_hidden(FANN::SIGMOID_SYMMETRIC_STEPWISE);
156  net.set_activation_function_output(FANN::SIGMOID_SYMMETRIC_STEPWISE);
157 
158  vector<unsigned short> referenceVector;
159  vector<unsigned short> outputVector;
160  float rmse=0;
161  vector<Vector2d<float> > tmpFeatures(nclass);
162  for(int iclass=0;iclass<nclass;++iclass){
163  tmpFeatures[iclass].resize(trainingFeatures[iclass].size(),nFeatures);
164  for(unsigned int isample=0;isample<m_nctraining[iclass];++isample){
165  for(int ifeature=0;ifeature<nFeatures;++ifeature){
166  tmpFeatures[iclass][isample][ifeature]=trainingFeatures[iclass][isample][ifeature];
167  }
168  }
169  }
170  m_cm.clearResults();
171  if(m_cv>0){
172  rmse=net.cross_validation(tmpFeatures,
173  ntraining,
174  m_cv,
175  m_maxit,
176  desired_error,
177  referenceVector,
178  outputVector,
179  m_verbose);
180  for(int isample=0;isample<referenceVector.size();++isample){
181  string refClassName=m_nameVector[referenceVector[isample]];
182  string className=m_nameVector[outputVector[isample]];
183  if(m_classValueMap.size())
184  m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0);
185  else
186  m_cm.incrementResult(m_cm.getClass(referenceVector[isample]),m_cm.getClass(outputVector[isample]),1.0);
187  }
188  }
189  else{//not working yet. please repair...
190  assert(m_cv>0);
191  bool initWeights=true;
192  net.train_on_data(tmpFeatures,ntraining,initWeights, m_maxit,
193  iterations_between_reports, desired_error);
194  vector<Vector2d<float> > testFeatures(nclass);
195  vector<float> result(nclass);
196  int maxClass=-1;
197  for(int iclass=0;iclass<nclass;++iclass){
198  testFeatures.resize(m_nctest[iclass],nFeatures);
199  for(unsigned int isample=0;isample<m_nctraining[iclass];++isample){
200  for(int ifeature=0;ifeature<nFeatures;++ifeature){
201  testFeatures[iclass][isample][ifeature]=trainingFeatures[iclass][m_nctraining[iclass]+isample][ifeature];
202  }
203  result=net.run(testFeatures[iclass][isample]);
204  string refClassName=m_nameVector[iclass];
205  float maxP=-1;
206  for(int ic=0;ic<nclass;++ic){
207  float pv=(result[ic]+1.0)/2.0;//bring back to scale [0,1]
208  if(pv>maxP){
209  maxP=pv;
210  maxClass=ic;
211  }
212  }
213  string className=m_nameVector[maxClass];
214  if(m_classValueMap.size())
215  m_cm.incrementResult(type2string<short>(m_classValueMap[refClassName]),type2string<short>(m_classValueMap[className]),1.0);
216  else
217  m_cm.incrementResult(m_cm.getClass(referenceVector[isample]),m_cm.getClass(outputVector[isample]),1.0);
218  }
219  }
220  }
221  assert(m_cm.nReference());
222  return(m_cm.kappa());
223 }
224 
225 int main(int argc, char *argv[])
226 {
227  // vector<double> priors;
228 
229  //--------------------------- command line options ------------------------------------
230  Optionpk<string> input_opt("i", "input", "input test set (leave empty to perform a cross validation based on training only)");
231  Optionpk<string> training_opt("t", "training", "training vector file. A single vector file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
232  Optionpk<string> tlayer_opt("tln", "tln", "training layer name(s)");
233  Optionpk<string> label_opt("label", "label", "identifier for class label in training vector file.","label");
234  Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0);
235  Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
236  Optionpk<bool> random_opt("random","random", "in case of balance, randomize input data", true);
237  Optionpk<int> minSize_opt("min", "min", "if number of training pixels is less then min, do not take this class into account", 0);
238  Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
239  Optionpk<double> bstart_opt("s", "start", "start band sequence number",0);
240  Optionpk<double> bend_opt("e", "end", "end band sequence number (set to 0 to include all bands)", 0);
241  Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
242  Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
243  Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: max rule).",0);
244  // Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0);
245  Optionpk<string> selector_opt("sm", "sm", "feature selection method (sffs=sequential floating forward search,sfs=sequential forward search, sbs, sequential backward search ,bfs=brute force search)","sffs");
246  Optionpk<float> epsilon_cost_opt("ecost", "ecost", "epsilon for stopping criterion in cost function to determine optimal number of features",0.001);
247  Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",2);
248  Optionpk<string> classname_opt("c", "class", "list of class names.");
249  Optionpk<short> classvalue_opt("r", "reclass", "list of class values (use same order as in classname opt.");
250  Optionpk<unsigned int> nneuron_opt("n", "nneuron", "number of neurons in hidden layers in neural network (multiple hidden layers are set by defining multiple number of neurons: -n 15 -n 1, default is one hidden layer with 5 neurons)", 5);
251  Optionpk<float> connection_opt("\0", "connection", "connection reate (default: 1.0 for a fully connected network)", 1.0);
252  Optionpk<float> weights_opt("w", "weights", "weights for neural network. Apply to fully connected network only, starting from first input neuron to last output neuron, including the bias neurons (last neuron in each but last layer)", 0.0);
253  Optionpk<float> learning_opt("l", "learning", "learning rate (default: 0.7)", 0.7);
254  Optionpk<unsigned int> maxit_opt("\0", "maxit", "number of maximum iterations (epoch) (default: 500)", 500);
255  Optionpk<short> verbose_opt("v", "verbose", "set to: 0 (results only), 1 (confusion matrix), 2 (debug)",0,2);
256 
257  tlayer_opt.setHide(1);
258  label_opt.setHide(1);
259  balance_opt.setHide(1);
260  random_opt.setHide(1);
261  minSize_opt.setHide(1);
262  band_opt.setHide(1);
263  bstart_opt.setHide(1);
264  bend_opt.setHide(1);
265  offset_opt.setHide(1);
266  scale_opt.setHide(1);
267  aggreg_opt.setHide(1);
268  // priors_opt.setHide(1);
269  selector_opt.setHide(1);
270  epsilon_cost_opt.setHide(1);
271  cv_opt.setHide(1);
272  classname_opt.setHide(1);
273  classvalue_opt.setHide(1);
274  nneuron_opt.setHide(1);
275  connection_opt.setHide(1);
276  weights_opt.setHide(1);
277  learning_opt.setHide(1);
278  maxit_opt.setHide(1);
279 
280  bool doProcess;//stop process when program was invoked with help option (-h --help)
281  try{
282  doProcess=input_opt.retrieveOption(argc,argv);
283  training_opt.retrieveOption(argc,argv);
284  maxFeatures_opt.retrieveOption(argc,argv);
285  tlayer_opt.retrieveOption(argc,argv);
286  label_opt.retrieveOption(argc,argv);
287  balance_opt.retrieveOption(argc,argv);
288  random_opt.retrieveOption(argc,argv);
289  minSize_opt.retrieveOption(argc,argv);
290  band_opt.retrieveOption(argc,argv);
291  bstart_opt.retrieveOption(argc,argv);
292  bend_opt.retrieveOption(argc,argv);
293  offset_opt.retrieveOption(argc,argv);
294  scale_opt.retrieveOption(argc,argv);
295  aggreg_opt.retrieveOption(argc,argv);
296  // priors_opt.retrieveOption(argc,argv);
297  selector_opt.retrieveOption(argc,argv);
298  epsilon_cost_opt.retrieveOption(argc,argv);
299  cv_opt.retrieveOption(argc,argv);
300  classname_opt.retrieveOption(argc,argv);
301  classvalue_opt.retrieveOption(argc,argv);
302  nneuron_opt.retrieveOption(argc,argv);
303  connection_opt.retrieveOption(argc,argv);
304  weights_opt.retrieveOption(argc,argv);
305  learning_opt.retrieveOption(argc,argv);
306  maxit_opt.retrieveOption(argc,argv);
307  verbose_opt.retrieveOption(argc,argv);
308  }
309  catch(string predefinedString){
310  std::cout << predefinedString << std::endl;
311  exit(0);
312  }
313  if(!doProcess){
314  cout << endl;
315  cout << "Usage: pkfsann -t training -n number" << endl;
316  cout << endl;
317  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
318  exit(0);//help was invoked, stop processing
319  }
320 
321  CostFactoryANN costfactory(nneuron_opt, connection_opt[0], weights_opt, learning_opt[0], maxit_opt[0], cv_opt[0], verbose_opt[0]);
322 
323  assert(training_opt.size());
324  if(input_opt.size())
325  costfactory.setCv(0);
326  if(verbose_opt[0]>=1){
327  if(input_opt.size())
328  std::cout << "input filename: " << input_opt[0] << std::endl;
329  std::cout << "training vector file: " << std::endl;
330  for(int ifile=0;ifile<training_opt.size();++ifile)
331  std::cout << training_opt[ifile] << std::endl;
332  std::cout << "verbose: " << verbose_opt[0] << std::endl;
333  }
334 
335  static std::map<std::string, SelectorValue> selMap;
336  //initialize selMap
337  selMap["sffs"]=SFFS;
338  selMap["sfs"]=SFS;
339  selMap["sbs"]=SBS;
340  selMap["bfs"]=BFS;
341 
342  assert(training_opt.size());
343  if(input_opt.size())
344  cv_opt[0]=0;
345  if(verbose_opt[0]>=1)
346  std::cout << "training vector file: " << training_opt[0] << std::endl;
347 
348  unsigned int totalSamples=0;
349  unsigned int totalTestSamples=0;
350 
351  unsigned short nclass=0;
352  int nband=0;
353  int startBand=2;//first two bands represent X and Y pos
354 
355  // if(priors_opt.size()>1){//priors from argument list
356  // priors.resize(priors_opt.size());
357  // double normPrior=0;
358  // for(int iclass=0;iclass<priors_opt.size();++iclass){
359  // priors[iclass]=priors_opt[iclass];
360  // normPrior+=priors[iclass];
361  // }
362  // //normalize
363  // for(int iclass=0;iclass<priors_opt.size();++iclass)
364  // priors[iclass]/=normPrior;
365  // }
366 
367  //sort bands
368  if(band_opt.size())
369  std::sort(band_opt.begin(),band_opt.end());
370 
371  // map<string,short> classValueMap;//global variable for now (due to getCost)
372  if(classname_opt.size()){
373  assert(classname_opt.size()==classvalue_opt.size());
374  for(int iclass=0;iclass<classname_opt.size();++iclass)
375  costfactory.setClassValueMap(classname_opt[iclass],classvalue_opt[iclass]);
376  }
377  //----------------------------------- Training -------------------------------
378  vector<double> offset;
379  vector<double> scale;
380  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
381  vector< Vector2d<float> > testPixels;//[class][sample][band]
382  map<string,Vector2d<float> > trainingMap;
383  map<string,Vector2d<float> > testMap;
384  vector<string> fields;
385 
386  //organize training data
387  trainingPixels.clear();
388  if(verbose_opt[0]>=1)
389  std::cout << "reading imageVector file " << training_opt[0] << std::endl;
390  try{
391  ImgReaderOgr trainingReader(training_opt[0]);
392  if(band_opt.size()){
393  totalSamples=trainingReader.readDataImageOgr(trainingMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
394  if(input_opt.size()){
395  ImgReaderOgr inputReader(input_opt[0]);
396  totalTestSamples=trainingReader.readDataImageOgr(testMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
397  inputReader.close();
398  }
399  }
400  else{
401  totalSamples=trainingReader.readDataImageOgr(trainingMap,fields,bstart_opt[0],bend_opt[0],label_opt[0],tlayer_opt,verbose_opt[0]);
402  if(input_opt.size()){
403  ImgReaderOgr inputReader(input_opt[0]);
404  totalTestSamples=trainingReader.readDataImageOgr(testMap,fields,bstart_opt[0],bend_opt[0],label_opt[0],tlayer_opt,verbose_opt[0]);
405  inputReader.close();
406  }
407  }
408  if(trainingMap.size()<2){
409  string errorstring="Error: could not read at least two classes from training file";
410  throw(errorstring);
411  }
412  if(input_opt.size()&&testMap.size()<2){
413  string errorstring="Error: could not read at least two classes from test input file";
414  throw(errorstring);
415  }
416  trainingReader.close();
417  }
418  catch(string error){
419  cerr << error << std::endl;
420  exit(1);
421  }
422  catch(std::exception& e){
423  std::cerr << "Error: ";
424  std::cerr << e.what() << std::endl;
425  std::cerr << CPLGetLastErrorMsg() << std::endl;
426  exit(1);
427  }
428  catch(...){
429  cerr << "error catched" << std::endl;
430  exit(1);
431  }
432  //delete class 0 ?
433  // if(verbose_opt[0]>=1)
434  // std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
435  // totalSamples-=trainingMap[0].size();
436  // trainingMap.erase(0);
437  //convert map to vector
438 
439  if(verbose_opt[0]>1)
440  std::cout << "training pixels: " << std::endl;
441  map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
442  while(mapit!=trainingMap.end()){
443  // if(classValueMap.size()){
444  // //check if name in training is covered by classname_opt (values can not be 0)
445  // if(classValueMap[mapit->first]>0){
446  // if(verbose_opt[0])
447  // std::cout << mapit->first << " -> " << classValueMap[mapit->first] << std::endl;
448  // }
449  // else{
450  // std::cerr << "Error: names in classname option are not complete, please check names in training vector and make sure classvalue is > 0" << std::endl;
451  // exit(1);
452  // }
453  // }
454  //delete small classes
455  if((mapit->second).size()<minSize_opt[0]){
456  trainingMap.erase(mapit);
457  continue;
458  }
459  trainingPixels.push_back(mapit->second);
460  if(verbose_opt[0]>1)
461  std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
462  ++mapit;
463  }
464  nclass=trainingPixels.size();
465  if(classname_opt.size())
466  assert(nclass==classname_opt.size());
467  nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
468 
469  mapit=testMap.begin();
470  while(mapit!=testMap.end()){
471  //no need to delete small classes for test sample
472  testPixels.push_back(mapit->second);
473  if(verbose_opt[0]>1)
474  std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
475  ++mapit;
476  }
477  if(input_opt.size()){
478  assert(nclass==testPixels.size());
479  assert(nband=testPixels[0][0].size()-2);//X and Y//testPixels[0][0].size();
480  assert(!cv_opt[0]);
481  }
482 
483  //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
484  //balance training data
485  if(balance_opt[0]>0){
486  if(random_opt[0])
487  srand(time(NULL));
488  totalSamples=0;
489  for(int iclass=0;iclass<nclass;++iclass){
490  if(trainingPixels[iclass].size()>balance_opt[0]){
491  while(trainingPixels[iclass].size()>balance_opt[0]){
492  int index=rand()%trainingPixels[iclass].size();
493  trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
494  }
495  }
496  else{
497  int oldsize=trainingPixels[iclass].size();
498  for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
499  int index = rand()%oldsize;
500  trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
501  }
502  }
503  totalSamples+=trainingPixels[iclass].size();
504  }
505  assert(totalSamples==nclass*balance_opt[0]);
506  }
507 
508  //set scale and offset
509  offset.resize(nband);
510  scale.resize(nband);
511  if(offset_opt.size()>1)
512  assert(offset_opt.size()==nband);
513  if(scale_opt.size()>1)
514  assert(scale_opt.size()==nband);
515  for(int iband=0;iband<nband;++iband){
516  if(verbose_opt[0]>1)
517  std::cout << "scaling for band" << iband << std::endl;
518  offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
519  scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
520  //search for min and maximum
521  if(scale[iband]<=0){
522  float theMin=trainingPixels[0][0][iband+startBand];
523  float theMax=trainingPixels[0][0][iband+startBand];
524  for(int iclass=0;iclass<nclass;++iclass){
525  for(int isample=0;isample<trainingPixels[iclass].size();++isample){
526  if(theMin>trainingPixels[iclass][isample][iband+startBand])
527  theMin=trainingPixels[iclass][isample][iband+startBand];
528  if(theMax<trainingPixels[iclass][isample][iband+startBand])
529  theMax=trainingPixels[iclass][isample][iband+startBand];
530  }
531  }
532  offset[iband]=theMin+(theMax-theMin)/2.0;
533  scale[iband]=(theMax-theMin)/2.0;
534  if(verbose_opt[0]>1){
535  std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
536  std::cout << "Using offset, scale: " << offset[iband] << ", " << scale[iband] << std::endl;
537  std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[iband])/scale[iband] << "," << (theMax-offset[iband])/scale[iband] << "]" << std::endl;
538  }
539  }
540  }
541 
542  // if(priors_opt.size()==1){//default: equal priors for each class
543  // priors.resize(nclass);
544  // for(int iclass=0;iclass<nclass;++iclass)
545  // priors[iclass]=1.0/nclass;
546  // }
547  // assert(priors_opt.size()==1||priors_opt.size()==nclass);
548 
549  if(verbose_opt[0]>=1){
550  std::cout << "number of bands: " << nband << std::endl;
551  std::cout << "number of classes: " << nclass << std::endl;
552  // std::cout << "priors:";
553  // for(int iclass=0;iclass<nclass;++iclass)
554  // std::cout << " " << priors[iclass];
555  // std::cout << std::endl;
556  }
557 
558  //set names in confusion matrix using nameVector
559  vector<string> nameVector=costfactory.getNameVector();
560  for(int iname=0;iname<nameVector.size();++iname){
561  if(costfactory.getClassValueMap().empty())
562  costfactory.pushBackClassName(nameVector[iname]);
563  // cm.pushBackClassName(nameVector[iname]);
564  else if(costfactory.getClassIndex(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]]))<0)
565  costfactory.pushBackClassName(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]]));
566  }
567 
568  // if(classname_opt.empty()){
569  // for(int iclass=0;iclass<nclass;++iclass){
570  // if(verbose_opt[0])
571  // std::cout << iclass << " " << cm.getClass(iclass) << " -> " << string2type<short>(cm.getClass(iclass)) << std::endl;
572  // classValueMap[cm.getClass(iclass)]=string2type<short>(cm.getClass(iclass));
573  // }
574  // }
575 
576  //Calculate features of trainig set
577 
578  vector<unsigned int> nctraining;
579  vector<unsigned int> nctest;
580  nctraining.resize(nclass);
581  nctest.resize(nclass);
582  vector< Vector2d<float> > trainingFeatures(nclass);
583  for(int iclass=0;iclass<nclass;++iclass){
584  if(verbose_opt[0]>=1)
585  std::cout << "calculating features for class " << iclass << std::endl;
586  nctraining[iclass]=trainingPixels[iclass].size();
587  if(verbose_opt[0]>=1)
588  std::cout << "nctraining[" << iclass << "]: " << nctraining[iclass] << std::endl;
589  if(testPixels.size()>iclass){
590  nctest[iclass]=testPixels[iclass].size();
591  if(verbose_opt[0]>=1){
592  std::cout << "nctest[" << iclass << "]: " << nctest[iclass] << std::endl;
593  }
594  }
595  else
596  nctest[iclass]=0;
597 
598  trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
599  for(int isample=0;isample<nctraining[iclass];++isample){
600  //scale pixel values according to scale and offset!!!
601  for(int iband=0;iband<nband;++iband){
602  assert(trainingPixels[iclass].size()>isample);
603  assert(trainingPixels[iclass][isample].size()>iband+startBand);
604  assert(offset.size()>iband);
605  assert(scale.size()>iband);
606  float value=trainingPixels[iclass][isample][iband+startBand];
607  trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
608  }
609  }
610  for(int isample=0;isample<nctest[iclass];++isample){
611  //scale pixel values according to scale and offset!!!
612  for(int iband=0;iband<nband;++iband){
613  assert(testPixels[iclass].size()>isample);
614  assert(testPixels[iclass][isample].size()>iband+startBand);
615  assert(offset.size()>iband);
616  assert(scale.size()>iband);
617  float value=testPixels[iclass][isample][iband+startBand];
618  // testFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
619  trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
620  }
621  }
622  assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
623  }
624 
625  costfactory.setNcTraining(nctraining);
626  costfactory.setNcTest(nctest);
627  int nFeatures=trainingFeatures[0][0].size();
628  int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
629  double previousCost=-1;
630  double cost=0;
631  list<int> subset;//set of selected features (levels) for each class combination
632  FeatureSelector selector;
633  try{
634  if(maxFeatures>=nFeatures){
635  subset.clear();
636  for(int ifeature=0;ifeature<nFeatures;++ifeature)
637  subset.push_back(ifeature);
638  cost=costfactory.getCost(trainingFeatures);
639  }
640  else{
641  while(fabs(cost-previousCost)>=epsilon_cost_opt[0]){
642  previousCost=cost;
643  switch(selMap[selector_opt[0]]){
644  case(SFFS):
645  subset.clear();//needed to clear in case of floating and brute force search
646  cost=selector.floating(trainingFeatures,costfactory,subset,maxFeatures,epsilon_cost_opt[0],verbose_opt[0]);
647  break;
648  case(SFS):
649  cost=selector.forward(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
650  break;
651  case(SBS):
652  cost=selector.backward(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
653  break;
654  case(BFS):
655  subset.clear();//needed to clear in case of floating and brute force search
656  cost=selector.bruteForce(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
657  break;
658  default:
659  std::cout << "Error: selector not supported, please use sffs, sfs, sbs or bfs" << std::endl;
660  exit(1);
661  break;
662  }
663  if(verbose_opt[0]>1){
664  std::cout << "cost: " << cost << std::endl;
665  std::cout << "previousCost: " << previousCost << std::endl;
666  std::cout << std::setprecision(12) << "cost-previousCost: " << cost - previousCost << " ( " << epsilon_cost_opt[0] << ")" << std::endl;
667  }
668  if(!maxFeatures_opt[0])
669  ++maxFeatures;
670  else
671  break;
672  }
673  }
674  }
675  catch(...){
676  std::cout << "catched feature selection" << std::endl;
677  exit(1);
678  }
679 
680  if(verbose_opt[0])
681  cout <<"cost: " << cost << endl;
682  subset.sort();
683  for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
684  std::cout << " -b " << *lit;
685  std::cout << std::endl;
686  // if((*(lit))!=subset.back())
687  // else
688  // cout << endl;
689 
690  // *NOTE* Because svm_model contains pointers to svm_problem, you can
691  // not free the memory used by svm_problem if you are still using the
692  // svm_model produced by svm_train().
693 
694  // free(prob.y);
695  // free(prob.x);
696  // free(x_space);
697  // svm_destroy_param(&param);
698  return 0;
699 }
700