25 #include "base/Optionpk.h"
26 #include "algorithms/ConfusionMatrix.h"
27 #include "algorithms/CostFactorySVM.h"
28 #include "algorithms/FeatureSelector.h"
29 #include "algorithms/svm.h"
30 #include "imageclasses/ImgReaderOgr.h"
100 enum SelectorValue { NA=0, SFFS=1, SFS=2, SBS=3, BFS=4};
257 int main(
int argc,
char *argv[])
262 Optionpk<string> input_opt(
"i",
"input",
"input test set (leave empty to perform a cross validation based on training only)");
263 Optionpk<string> training_opt(
"t",
"training",
"training vector file. A single vector file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option).");
265 Optionpk<string> label_opt(
"label",
"label",
"identifier for class label in training vector file.",
"label");
266 Optionpk<unsigned short> maxFeatures_opt(
"n",
"nf",
"number of features to select (0 to select optimal number, see also ecost option)", 0);
267 Optionpk<unsigned int> balance_opt(
"bal",
"balance",
"balance the input data to this number of samples for each class", 0);
268 Optionpk<bool> random_opt(
"random",
"random",
"in case of balance, randomize input data",
true);
269 Optionpk<int> minSize_opt(
"min",
"min",
"if number of training pixels is less then min, do not take this class into account", 0);
270 Optionpk<short> band_opt(
"b",
"band",
"band index (starting from 0, either use band option or use start to end)");
272 Optionpk<double> bend_opt(
"e",
"end",
"end band sequence number (set to 0 to include all bands)", 0);
273 Optionpk<double> offset_opt(
"\0",
"offset",
"offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
274 Optionpk<double> scale_opt(
"\0",
"scale",
"scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
275 Optionpk<string> selector_opt(
"sm",
"sm",
"feature selection method (sffs=sequential floating forward search,sfs=sequential forward search, sbs, sequential backward search ,bfs=brute force search)",
"sffs");
276 Optionpk<float> epsilon_cost_opt(
"ecost",
"ecost",
"epsilon for stopping criterion in cost function to determine optimal number of features",0.001);
278 Optionpk<std::string> svm_type_opt(
"svmt",
"svmtype",
"type of SVM (C_SVC, nu_SVC,one_class, epsilon_SVR, nu_SVR)",
"C_SVC");
279 Optionpk<std::string> kernel_type_opt(
"kt",
"kerneltype",
"type of kernel function (linear,polynomial,radial,sigmoid) ",
"radial");
281 Optionpk<float> gamma_opt(
"g",
"gamma",
"gamma in kernel function",1.0);
282 Optionpk<float> coef0_opt(
"c0",
"coef0",
"coef0 in kernel function",0);
283 Optionpk<float> ccost_opt(
"cc",
"ccost",
"the parameter C of C-SVC, epsilon-SVR, and nu-SVR",1000);
284 Optionpk<float> nu_opt(
"nu",
"nu",
"the parameter nu of nu-SVC, one-class SVM, and nu-SVR",0.5);
285 Optionpk<float> epsilon_loss_opt(
"eloss",
"eloss",
"the epsilon in loss function of epsilon-SVR",0.1);
286 Optionpk<int> cache_opt(
"cache",
"cache",
"cache memory size in MB",100);
287 Optionpk<float> epsilon_tol_opt(
"etol",
"etol",
"the tolerance of termination criterion",0.001);
288 Optionpk<bool> shrinking_opt(
"shrink",
"shrink",
"whether to use the shrinking heuristics",
false);
289 Optionpk<bool> prob_est_opt(
"pe",
"probest",
"whether to train a SVC or SVR model for probability estimates",
true,2);
292 Optionpk<short> classvalue_opt(
"r",
"reclass",
"list of class values (use same order as in classname opt.");
293 Optionpk<short> verbose_opt(
"v",
"verbose",
"set to: 0 (results only), 1 (confusion matrix), 2 (debug)",0,2);
295 tlayer_opt.setHide(1);
296 label_opt.setHide(1);
297 balance_opt.setHide(1);
298 random_opt.setHide(1);
299 minSize_opt.setHide(1);
301 bstart_opt.setHide(1);
303 offset_opt.setHide(1);
304 scale_opt.setHide(1);
305 svm_type_opt.setHide(1);
306 kernel_type_opt.setHide(1);
307 kernel_degree_opt.setHide(1);
308 gamma_opt.setHide(1);
309 coef0_opt.setHide(1);
310 ccost_opt.setHide(1);
312 epsilon_loss_opt.setHide(1);
313 cache_opt.setHide(1);
314 epsilon_tol_opt.setHide(1);
315 shrinking_opt.setHide(1);
316 prob_est_opt.setHide(1);
317 selector_opt.setHide(1);
318 epsilon_cost_opt.setHide(1);
320 classname_opt.setHide(1);
321 classvalue_opt.setHide(1);
325 doProcess=input_opt.retrieveOption(argc,argv);
326 training_opt.retrieveOption(argc,argv);
327 maxFeatures_opt.retrieveOption(argc,argv);
328 tlayer_opt.retrieveOption(argc,argv);
329 label_opt.retrieveOption(argc,argv);
330 balance_opt.retrieveOption(argc,argv);
331 random_opt.retrieveOption(argc,argv);
332 minSize_opt.retrieveOption(argc,argv);
333 band_opt.retrieveOption(argc,argv);
334 bstart_opt.retrieveOption(argc,argv);
335 bend_opt.retrieveOption(argc,argv);
336 offset_opt.retrieveOption(argc,argv);
337 scale_opt.retrieveOption(argc,argv);
338 svm_type_opt.retrieveOption(argc,argv);
339 kernel_type_opt.retrieveOption(argc,argv);
340 kernel_degree_opt.retrieveOption(argc,argv);
341 gamma_opt.retrieveOption(argc,argv);
342 coef0_opt.retrieveOption(argc,argv);
343 ccost_opt.retrieveOption(argc,argv);
344 nu_opt.retrieveOption(argc,argv);
345 epsilon_loss_opt.retrieveOption(argc,argv);
346 cache_opt.retrieveOption(argc,argv);
347 epsilon_tol_opt.retrieveOption(argc,argv);
348 shrinking_opt.retrieveOption(argc,argv);
349 prob_est_opt.retrieveOption(argc,argv);
350 selector_opt.retrieveOption(argc,argv);
351 epsilon_cost_opt.retrieveOption(argc,argv);
352 cv_opt.retrieveOption(argc,argv);
353 classname_opt.retrieveOption(argc,argv);
354 classvalue_opt.retrieveOption(argc,argv);
355 verbose_opt.retrieveOption(argc,argv);
357 catch(
string predefinedString){
358 std::cout << predefinedString << std::endl;
363 cout <<
"Usage: pkfssvm -t training -n number" << endl;
365 std::cout <<
"short option -h shows basic options only, use long option --help to show all options" << std::endl;
369 CostFactorySVM costfactory(svm_type_opt[0], kernel_type_opt[0], kernel_degree_opt[0], gamma_opt[0], coef0_opt[0], ccost_opt[0], nu_opt[0], epsilon_loss_opt[0], cache_opt[0], epsilon_tol_opt[0], shrinking_opt[0], prob_est_opt[0], cv_opt[0], verbose_opt[0]);
371 assert(training_opt.size());
373 costfactory.setCv(0);
374 if(verbose_opt[0]>=1){
376 std::cout <<
"input filename: " << input_opt[0] << std::endl;
377 std::cout <<
"training vector file: " << std::endl;
378 for(
int ifile=0;ifile<training_opt.size();++ifile)
379 std::cout << training_opt[ifile] << std::endl;
380 std::cout <<
"verbose: " << verbose_opt[0] << std::endl;
383 static std::map<std::string, SelectorValue> selMap;
390 unsigned int totalSamples=0;
391 unsigned int totalTestSamples=0;
393 unsigned short nclass=0;
411 std::sort(band_opt.begin(),band_opt.end());
413 if(classname_opt.size()){
414 assert(classname_opt.size()==classvalue_opt.size());
415 for(
int iclass=0;iclass<classname_opt.size();++iclass)
416 costfactory.setClassValueMap(classname_opt[iclass],classvalue_opt[iclass]);
420 vector<double> offset;
421 vector<double> scale;
422 vector< Vector2d<float> > trainingPixels;
423 vector< Vector2d<float> > testPixels;
424 map<string,Vector2d<float> > trainingMap;
425 map<string,Vector2d<float> > testMap;
426 vector<string> fields;
430 trainingPixels.clear();
432 if(verbose_opt[0]>=1)
433 std::cout <<
"reading training file " << training_opt[0] << std::endl;
437 totalSamples=trainingReader.readDataImageOgr(trainingMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
438 if(input_opt.size()){
440 totalTestSamples=inputReader.readDataImageOgr(testMap,fields,band_opt,label_opt[0],tlayer_opt,verbose_opt[0]);
445 totalSamples=trainingReader.readDataImageOgr(trainingMap,fields,bstart_opt[0],bend_opt[0],label_opt[0],tlayer_opt,verbose_opt[0]);
446 if(input_opt.size()){
448 totalTestSamples=inputReader.readDataImageOgr(testMap,fields,bstart_opt[0],bend_opt[0],label_opt[0],tlayer_opt,verbose_opt[0]);
452 if(trainingMap.size()<2){
453 string errorstring=
"Error: could not read at least two classes from training input file";
456 if(input_opt.size()&&testMap.size()<2){
457 string errorstring=
"Error: could not read at least two classes from test input file";
460 trainingReader.close();
463 cerr << error << std::endl;
466 catch(std::exception& e){
467 std::cerr <<
"Error: ";
468 std::cerr << e.what() << std::endl;
469 std::cerr << CPLGetLastErrorMsg() << std::endl;
473 cerr <<
"error catched" << std::endl;
483 std::cout <<
"training pixels: " << std::endl;
484 map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
485 while(mapit!=trainingMap.end()){
487 if((mapit->second).size()<minSize_opt[0]){
488 trainingMap.erase(mapit);
491 costfactory.pushBackName(mapit->first);
492 trainingPixels.push_back(mapit->second);
494 std::cout << mapit->first <<
": " << (mapit->second).size() <<
" samples" << std::endl;
497 nclass=trainingPixels.size();
498 if(classname_opt.size())
499 assert(nclass==classname_opt.size());
500 nband=trainingPixels[0][0].size()-2;
502 mapit=testMap.begin();
503 while(mapit!=testMap.end()){
504 if(costfactory.getClassValueMap().size()){
507 if((costfactory.getClassValueMap())[mapit->first]>0){
511 std::cerr <<
"Error: names in classname option are not complete, please check names in test vector and make sure classvalue is > 0" << std::endl;
516 testPixels.push_back(mapit->second);
518 std::cout << mapit->first <<
": " << (mapit->second).size() <<
" samples" << std::endl;
521 if(input_opt.size()){
522 assert(nclass==testPixels.size());
523 assert(nband=testPixels[0][0].size()-2);
530 if(balance_opt[0]>0){
534 for(
int iclass=0;iclass<nclass;++iclass){
535 if(trainingPixels[iclass].size()>balance_opt[0]){
536 while(trainingPixels[iclass].size()>balance_opt[0]){
537 int index=rand()%trainingPixels[iclass].size();
538 trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
542 int oldsize=trainingPixels[iclass].size();
543 for(
int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
544 int index = rand()%oldsize;
545 trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
548 totalSamples+=trainingPixels[iclass].size();
550 assert(totalSamples==nclass*balance_opt[0]);
554 offset.resize(nband);
556 if(offset_opt.size()>1)
557 assert(offset_opt.size()==nband);
558 if(scale_opt.size()>1)
559 assert(scale_opt.size()==nband);
560 for(
int iband=0;iband<nband;++iband){
562 std::cout <<
"scaling for band" << iband << std::endl;
563 offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
564 scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
567 float theMin=trainingPixels[0][0][iband+startBand];
568 float theMax=trainingPixels[0][0][iband+startBand];
569 for(
int iclass=0;iclass<nclass;++iclass){
570 for(
int isample=0;isample<trainingPixels[iclass].size();++isample){
571 if(theMin>trainingPixels[iclass][isample][iband+startBand])
572 theMin=trainingPixels[iclass][isample][iband+startBand];
573 if(theMax<trainingPixels[iclass][isample][iband+startBand])
574 theMax=trainingPixels[iclass][isample][iband+startBand];
577 offset[iband]=theMin+(theMax-theMin)/2.0;
578 scale[iband]=(theMax-theMin)/2.0;
579 if(verbose_opt[0]>1){
580 std::cout <<
"Extreme image values for band " << iband <<
": [" << theMin <<
"," << theMax <<
"]" << std::endl;
581 std::cout <<
"Using offset, scale: " << offset[iband] <<
", " << scale[iband] << std::endl;
582 std::cout <<
"scaled values for band " << iband <<
": [" << (theMin-offset[iband])/scale[iband] <<
"," << (theMax-offset[iband])/scale[iband] <<
"]" << std::endl;
594 if(verbose_opt[0]>=1){
595 std::cout <<
"number of bands: " << nband << std::endl;
596 std::cout <<
"number of classes: " << nclass << std::endl;
604 vector<string> nameVector=costfactory.getNameVector();
605 for(
int iname=0;iname<nameVector.size();++iname){
606 if(costfactory.getClassValueMap().empty())
607 costfactory.pushBackClassName(nameVector[iname]);
609 else if(costfactory.getClassIndex(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]]))<0)
610 costfactory.pushBackClassName(type2string<short>((costfactory.getClassValueMap())[nameVector[iname]]));
616 vector<unsigned int> nctraining;
617 vector<unsigned int> nctest;
618 nctraining.resize(nclass);
619 nctest.resize(nclass);
620 vector< Vector2d<float> > trainingFeatures(nclass);
621 for(
int iclass=0;iclass<nclass;++iclass){
622 if(verbose_opt[0]>=1)
623 std::cout <<
"calculating features for class " << iclass << std::endl;
624 nctraining[iclass]=trainingPixels[iclass].size();
625 if(verbose_opt[0]>=1)
626 std::cout <<
"nctraining[" << iclass <<
"]: " << nctraining[iclass] << std::endl;
627 if(testPixels.size()>iclass){
628 nctest[iclass]=testPixels[iclass].size();
629 if(verbose_opt[0]>=1){
630 std::cout <<
"nctest[" << iclass <<
"]: " << nctest[iclass] << std::endl;
636 trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
637 for(
int isample=0;isample<nctraining[iclass];++isample){
639 for(
int iband=0;iband<nband;++iband){
640 assert(trainingPixels[iclass].size()>isample);
641 assert(trainingPixels[iclass][isample].size()>iband+startBand);
642 assert(offset.size()>iband);
643 assert(scale.size()>iband);
644 float value=trainingPixels[iclass][isample][iband+startBand];
645 trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
648 for(
int isample=0;isample<nctest[iclass];++isample){
650 for(
int iband=0;iband<nband;++iband){
651 assert(testPixels[iclass].size()>isample);
652 assert(testPixels[iclass][isample].size()>iband+startBand);
653 assert(offset.size()>iband);
654 assert(scale.size()>iband);
655 float value=testPixels[iclass][isample][iband+startBand];
657 trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
660 assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
663 costfactory.setNcTraining(nctraining);
664 costfactory.setNcTest(nctest);
665 int nFeatures=trainingFeatures[0][0].size();
666 int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
667 double previousCost=-1;
672 if(maxFeatures>=nFeatures){
674 for(
int ifeature=0;ifeature<nFeatures;++ifeature)
675 subset.push_back(ifeature);
676 cost=costfactory.getCost(trainingFeatures);
679 while(fabs(cost-previousCost)>=epsilon_cost_opt[0]){
681 switch(selMap[selector_opt[0]]){
684 cost=selector.floating(trainingFeatures,costfactory,subset,maxFeatures,epsilon_cost_opt[0],verbose_opt[0]);
687 cost=selector.forward(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
690 cost=selector.backward(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
694 cost=selector.bruteForce(trainingFeatures,costfactory,subset,maxFeatures,verbose_opt[0]);
697 std::cout <<
"Error: selector not supported, please use sffs, sfs, sbs or bfs" << std::endl;
701 if(verbose_opt[0]>1){
702 std::cout <<
"cost: " << cost << std::endl;
703 std::cout <<
"previousCost: " << previousCost << std::endl;
704 std::cout << std::setprecision(12) <<
"cost-previousCost: " << cost - previousCost <<
" ( " << epsilon_cost_opt[0] <<
")" << std::endl;
706 if(!maxFeatures_opt[0])
714 std::cout <<
"catched feature selection" << std::endl;
719 cout <<
"cost: " << cost << endl;
721 for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
722 std::cout <<
" -b " << *lit;
723 std::cout << std::endl;