pktools  2.6.3
Processing Kernel for geospatial data
pkstat.cc
1 /**********************************************************************
2 pkstat.cc: program to calculate basic statistics from raster dataset
3 Copyright (C) 2008-2015 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <iostream>
21 #include <fstream>
22 #include <math.h>
23 #include "base/Optionpk.h"
24 #include "algorithms/StatFactory.h"
25 #include "algorithms/ImgRegression.h"
26 /******************************************************************************/
78 using namespace std;
79 
80 int main(int argc, char *argv[])
81 {
82  Optionpk<string> input_opt("i","input","name of the input raster dataset");
83  Optionpk<unsigned short> band_opt("b","band","band(s) on which to calculate statistics",0);
84  Optionpk<bool> filename_opt("f", "filename", "Shows image filename ", false);
85  Optionpk<bool> stat_opt("stats", "statistics", "Shows basic statistics (min,max, mean and stdDev of the raster datasets)", false);
86  Optionpk<double> ulx_opt("ulx", "ulx", "Upper left x value bounding box");
87  Optionpk<double> uly_opt("uly", "uly", "Upper left y value bounding box");
88  Optionpk<double> lrx_opt("lrx", "lrx", "Lower right x value bounding box");
89  Optionpk<double> lry_opt("lry", "lry", "Lower right y value bounding box");
90  Optionpk<double> nodata_opt("nodata","nodata","Set nodata value(s)");
91  Optionpk<short> down_opt("down", "down", "Down sampling factor (for raster sample datasets only). Can be used to create grid points", 1);
92  Optionpk<unsigned int> random_opt("rnd", "rnd", "generate random numbers", 0);
93  Optionpk<double> scale_opt("scale", "scale", "Scale(s) for reading input image(s)");
94  Optionpk<double> offset_opt("offset", "offset", "Offset(s) for reading input image(s)");
95 
96  // Optionpk<bool> transpose_opt("t","transpose","transpose output",false);
97  // Optionpk<std::string> randdist_opt("dist", "dist", "distribution for generating random numbers, see http://www.gn/software/gsl/manual/gsl-ref_toc.html#TOC320 (only uniform and Gaussian supported yet)", "gaussian");
98  // Optionpk<double> randa_opt("rnda", "rnda", "first parameter for random distribution (mean value in case of Gaussian)", 0);
99  // Optionpk<double> randb_opt("rndb", "rndb", "second parameter for random distribution (standard deviation in case of Gaussian)", 1);
100  Optionpk<bool> mean_opt("mean","mean","calculate mean",false);
101  Optionpk<bool> median_opt("median","median","calculate median",false);
102  Optionpk<bool> var_opt("var","var","calculate variance",false);
103  Optionpk<bool> skewness_opt("skew","skewness","calculate skewness",false);
104  Optionpk<bool> kurtosis_opt("kurt","kurtosis","calculate kurtosis",false);
105  Optionpk<bool> stdev_opt("stdev","stdev","calculate standard deviation",false);
106  Optionpk<bool> sum_opt("sum","sum","calculate sum of column",false);
107  Optionpk<bool> minmax_opt("mm","minmax","calculate minimum and maximum value",false);
108  Optionpk<bool> min_opt("min","min","calculate minimum value",false);
109  Optionpk<bool> max_opt("max","max","calculate maximum value",false);
110  Optionpk<double> src_min_opt("src_min","src_min","start reading source from this minimum value");
111  Optionpk<double> src_max_opt("src_max","src_max","stop reading source from this maximum value");
112  Optionpk<bool> histogram_opt("hist","hist","calculate histogram",false);
113  Optionpk<bool> histogram2d_opt("hist2d","hist2d","calculate 2-dimensional histogram based on two images",false);
114  Optionpk<short> nbin_opt("nbin","nbin","number of bins to calculate histogram");
115  Optionpk<bool> relative_opt("rel","relative","use percentiles for histogram to calculate histogram",false);
116  Optionpk<bool> kde_opt("kde","kde","Use Kernel density estimation when producing histogram. The standard deviation is estimated based on Silverman's rule of thumb",false);
117  Optionpk<bool> correlation_opt("cor","correlation","calculate Pearson produc-moment correlation coefficient between two raster datasets (defined by -c <col1> -c <col2>)",false);
118  Optionpk<bool> rmse_opt("rmse","rmse","calculate root mean square error between two raster datasets",false);
119  Optionpk<bool> reg_opt("reg","regression","calculate linear regression between two raster datasets and get correlation coefficient",false);
120  Optionpk<bool> regerr_opt("regerr","regerr","calculate linear regression between two raster datasets and get root mean square error",false);
121  Optionpk<bool> preg_opt("preg","preg","calculate perpendicular regression between two raster datasets and get correlation coefficient",false);
122  Optionpk<short> verbose_opt("v", "verbose", "verbose mode when positive", 0,2);
123  ulx_opt.setHide(1);
124  uly_opt.setHide(1);
125  lrx_opt.setHide(1);
126  lry_opt.setHide(1);
127  down_opt.setHide(1);
128  random_opt.setHide(1);
129  scale_opt.setHide(1);
130  offset_opt.setHide(1);
131  src_min_opt.setHide(1);
132  src_max_opt.setHide(1);
133  kde_opt.setHide(1);
134 
135  // range_opt.setHide(1);
136  // transpose_opt.setHide(1);
137 
138  bool doProcess;//stop process when program was invoked with help option (-h --help)
139  try{
140  //mandatory options
141  doProcess=input_opt.retrieveOption(argc,argv);
142  //optional options
143  band_opt.retrieveOption(argc,argv);
144  filename_opt.retrieveOption(argc,argv);
145  stat_opt.retrieveOption(argc,argv);
146  nodata_opt.retrieveOption(argc,argv);
147  mean_opt.retrieveOption(argc,argv);
148  median_opt.retrieveOption(argc,argv);
149  var_opt.retrieveOption(argc,argv);
150  stdev_opt.retrieveOption(argc,argv);
151  minmax_opt.retrieveOption(argc,argv);
152  min_opt.retrieveOption(argc,argv);
153  max_opt.retrieveOption(argc,argv);
154  histogram_opt.retrieveOption(argc,argv);
155  nbin_opt.retrieveOption(argc,argv);
156  relative_opt.retrieveOption(argc,argv);
157  histogram2d_opt.retrieveOption(argc,argv);
158  correlation_opt.retrieveOption(argc,argv);
159  rmse_opt.retrieveOption(argc,argv);
160  reg_opt.retrieveOption(argc,argv);
161  regerr_opt.retrieveOption(argc,argv);
162  preg_opt.retrieveOption(argc,argv);
163  //advanced options
164  ulx_opt.retrieveOption(argc,argv);
165  uly_opt.retrieveOption(argc,argv);
166  lrx_opt.retrieveOption(argc,argv);
167  lry_opt.retrieveOption(argc,argv);
168  down_opt.retrieveOption(argc,argv);
169  random_opt.retrieveOption(argc,argv);
170  scale_opt.retrieveOption(argc,argv);
171  offset_opt.retrieveOption(argc,argv);
172  src_min_opt.retrieveOption(argc,argv);
173  src_max_opt.retrieveOption(argc,argv);
174  kde_opt.retrieveOption(argc,argv);
175  verbose_opt.retrieveOption(argc,argv);
176  }
177  catch(string predefinedString){
178  std::cout << predefinedString << std::endl;
179  exit(0);
180  }
181  if(!doProcess){
182  cout << endl;
183  cout << "Usage: pkstat -i input" << endl;
184  cout << endl;
185  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
186  exit(0);//help was invoked, stop processing
187  }
188 
189  if(src_min_opt.size()){
190  while(src_min_opt.size()<band_opt.size())
191  src_min_opt.push_back(src_min_opt[0]);
192  }
193  if(src_max_opt.size()){
194  while(src_max_opt.size()<band_opt.size())
195  src_max_opt.push_back(src_max_opt[0]);
196  }
197 
198  unsigned int nbin=0;
199  double minX=0;
200  double minY=0;
201  double maxX=0;
202  double maxY=0;
203  double minValue=0;
204  double maxValue=0;
205  double meanValue=0;
206  double stdDev=0;
207 
208  const char* pszMessage;
209  void* pProgressArg=NULL;
210  GDALProgressFunc pfnProgress=GDALTermProgress;
211  double progress=0;
212  srand(time(NULL));
213 
216  std::vector<double> histogramOutput;
217  double nsample=0;
218 
219  ImgReaderGdal imgReader;
220 
221  if(scale_opt.size()){
222  while(scale_opt.size()<input_opt.size())
223  scale_opt.push_back(scale_opt[0]);
224  }
225  if(offset_opt.size()){
226  while(offset_opt.size()<input_opt.size())
227  offset_opt.push_back(offset_opt[0]);
228  }
229  if(input_opt.empty()){
230  std::cerr << "No image dataset provided (use option -i). Use --help for help information";
231  exit(0);
232  }
233  for(int ifile=0;ifile<input_opt.size();++ifile){
234  try{
235  imgReader.open(input_opt[ifile]);
236  }
237  catch(std::string errorstring){
238  std::cout << errorstring << std::endl;
239  exit(0);
240  }
241 
242  if(filename_opt[0])
243  std::cout << " --input " << input_opt[ifile] << " ";
244 
245  for(int inodata=0;inodata<nodata_opt.size();++inodata)
246  imgReader.pushNoDataValue(nodata_opt[inodata]);
247 
248  int nband=band_opt.size();
249  for(int iband=0;iband<nband;++iband){
250 
251  for(int inodata=0;inodata<nodata_opt.size();++inodata){
252  if(!inodata)
253  imgReader.GDALSetNoDataValue(nodata_opt[0],iband);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
254  }
255 
256  if(offset_opt.size()>ifile)
257  imgReader.setOffset(offset_opt[ifile],band_opt[iband]);
258  if(scale_opt.size()>ifile)
259  imgReader.setScale(scale_opt[ifile],band_opt[iband]);
260 
261  // if(stat_opt[0]||mean_opt[0]||var_opt[0]||stdev_opt[0]){
262  // assert(band_opt[iband]<imgReader.nrOfBand());
263  // GDALProgressFunc pfnProgress;
264  // void* pProgressData;
265  // GDALRasterBand* rasterBand;
266  // rasterBand=imgReader.getRasterBand(band_opt[iband]);
267  // rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev,pfnProgress,pProgressData);
268 
269  // if(mean_opt[0])
270  // std::cout << "--mean " << meanValue << " ";
271  // if(stdev_opt[0])
272  // std::cout << "--stdDev " << stdDev << " ";
273  // if(var_opt[0])
274  // std::cout << "--var " << stdDev*stdDev << " ";
275  // if(stat_opt[0])
276  // std::cout << "-min " << minValue << " -max " << maxValue << " --mean " << meanValue << " --stdDev " << stdDev << " ";
277  // }
278 
279  if(minmax_opt[0]||min_opt[0]||max_opt[0]){
280  assert(band_opt[iband]<imgReader.nrOfBand());
281 
282  if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
283  double uli,ulj,lri,lrj;
284  imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
285  imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
286  imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
287  }
288  else{
289  imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
290  }
291  if(minmax_opt[0])
292  std::cout << "-min " << minValue << " -max " << maxValue << " ";
293  else{
294  if(min_opt[0])
295  std::cout << "-min " << minValue << " ";
296  if(max_opt[0])
297  std::cout << "-max " << maxValue << " ";
298  }
299  }
300  }
301  if(histogram_opt[0]){//aggregate results from multiple inputs, but only calculate for first selected band
302  assert(band_opt[0]<imgReader.nrOfBand());
303  nbin=(nbin_opt.size())? nbin_opt[0]:0;
304 
305  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
306  if(src_min_opt.size())
307  minValue=src_min_opt[0];
308  if(src_max_opt.size())
309  maxValue=src_max_opt[0];
310  if(minValue>=maxValue)
311  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
312 
313  if(verbose_opt[0])
314  cout << "number of valid pixels in image: " << imgReader.getNvalid(band_opt[0]) << endl;
315 
316  nsample+=imgReader.getHistogram(histogramOutput,minValue,maxValue,nbin,band_opt[0],kde_opt[0]);
317 
318  //only output for last input file
319  if(ifile==input_opt.size()-1){
320  std::cout.precision(10);
321  for(int bin=0;bin<nbin;++bin){
322  double binValue=0;
323  if(nbin==maxValue-minValue+1)
324  binValue=minValue+bin;
325  else
326  binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
327  std::cout << binValue << " ";
328  if(relative_opt[0]||kde_opt[0])
329  std::cout << 100.0*static_cast<double>(histogramOutput[bin])/static_cast<double>(nsample) << std::endl;
330  else
331  std::cout << static_cast<double>(histogramOutput[bin]) << std::endl;
332  }
333  }
334  }
335  if(histogram2d_opt[0]&&input_opt.size()<2){
336  assert(band_opt.size()>1);
337  imgReader.getMinMax(minX,maxX,band_opt[0]);
338  imgReader.getMinMax(minY,maxY,band_opt[1]);
339  if(src_min_opt.size()){
340  minX=src_min_opt[0];
341  minY=src_min_opt[1];
342  }
343  if(src_max_opt.size()){
344  maxX=src_max_opt[0];
345  maxY=src_max_opt[1];
346  }
347  nbin=(nbin_opt.size())? nbin_opt[0]:0;
348  if(nbin<=1){
349  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
350  if(minX>=maxX)
351  imgReader.getMinMax(minX,maxX,band_opt[0]);
352  if(minY>=maxY)
353  imgReader.getMinMax(minY,maxY,band_opt[1]);
354 
355  minValue=(minX<minY)? minX:minY;
356  maxValue=(maxX>maxY)? maxX:maxY;
357  if(verbose_opt[0])
358  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
359  nbin=maxValue-minValue+1;
360  }
361  assert(nbin>1);
362  double sigma=0;
363  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
364  if(kde_opt[0]){
365  assert(band_opt[0]<imgReader.nrOfBand());
366  assert(band_opt[1]<imgReader.nrOfBand());
367  GDALProgressFunc pfnProgress;
368  void* pProgressData;
369  GDALRasterBand* rasterBand;
370  double stdDev1=0;
371  double stdDev2=0;
372  rasterBand=imgReader.getRasterBand(band_opt[0]);
373  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
374  rasterBand=imgReader.getRasterBand(band_opt[1]);
375  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
376 
377  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
378  if(random_opt[0]>0)
379  estimatedSize*=random_opt[0]/100.0;
380  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
381  }
382  assert(nbin);
383  if(verbose_opt[0]){
384  if(sigma>0)
385  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
386  else
387  std::cout << "calculating 2d histogram for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
388  std::cout << "nbin: " << nbin << std::endl;
389  }
390 
391 
392  vector< vector<double> > output;
393 
394  if(maxX<=minX)
395  imgReader.getMinMax(minX,maxX,band_opt[0]);
396  if(maxY<=minY)
397  imgReader.getMinMax(minY,maxY,band_opt[1]);
398 
399  if(maxX<=minX){
400  std::ostringstream s;
401  s<<"Error: could not calculate distribution (minX>=maxX)";
402  throw(s.str());
403  }
404  if(maxY<=minY){
405  std::ostringstream s;
406  s<<"Error: could not calculate distribution (minY>=maxY)";
407  throw(s.str());
408  }
409  output.resize(nbin);
410  for(int i=0;i<nbin;++i){
411  output[i].resize(nbin);
412  for(int j=0;j<nbin;++j)
413  output[i][j]=0;
414  }
415  int binX=0;
416  int binY=0;
417  vector<double> inputX(imgReader.nrOfCol());
418  vector<double> inputY(imgReader.nrOfCol());
419  unsigned long int nvalid=0;
420  for(int irow=0;irow<imgReader.nrOfRow();++irow){
421  if(irow%down_opt[0])
422  continue;
423  imgReader.readData(inputX,GDT_Float64,irow,band_opt[0]);
424  imgReader.readData(inputY,GDT_Float64,irow,band_opt[1]);
425  for(int icol=0;icol<imgReader.nrOfCol();++icol){
426  if(icol%down_opt[0])
427  continue;
428  if(random_opt[0]>0){
429  double p=static_cast<double>(rand())/(RAND_MAX);
430  p*=100.0;
431  if(p>random_opt[0])
432  continue;//do not select for now, go to next column
433  }
434  if(imgReader.isNoData(inputX[icol]))
435  continue;
436  if(imgReader.isNoData(inputY[icol]))
437  continue;
438  ++nvalid;
439  if(inputX[icol]>=maxX)
440  binX=nbin-1;
441  else if(inputX[icol]<=minX)
442  binX=0;
443  else
444  binX=static_cast<int>(static_cast<double>(inputX[icol]-minX)/(maxX-minX)*nbin);
445  if(inputY[icol]>=maxY)
446  binY=nbin-1;
447  else if(inputY[icol]<=minX)
448  binY=0;
449  else
450  binY=static_cast<int>(static_cast<double>(inputY[icol]-minY)/(maxY-minY)*nbin);
451  assert(binX>=0);
452  assert(binX<output.size());
453  assert(binY>=0);
454  assert(binY<output[binX].size());
455  if(sigma>0){
456  //create kde for Gaussian basis function
457  //todo: speed up by calculating first and last bin with non-zero contriubtion...
458  for(int ibinX=0;ibinX<nbin;++ibinX){
459  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
460  double pdfX=gsl_ran_gaussian_pdf(inputX[icol]-centerX, sigma);
461  for(int ibinY=0;ibinY<nbin;++ibinY){
462  //calculate \integral_ibinX^(ibinX+1)
463  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
464  double pdfY=gsl_ran_gaussian_pdf(inputY[icol]-centerY, sigma);
465  output[ibinX][binY]+=pdfX*pdfY;
466  }
467  }
468  }
469  else
470  ++output[binX][binY];
471  }
472  }
473  if(verbose_opt[0])
474  cout << "number of valid pixels: " << nvalid << endl;
475 
476  for(int binX=0;binX<nbin;++binX){
477  cout << endl;
478  for(int binY=0;binY<nbin;++binY){
479  double binValueX=0;
480  if(nbin==maxX-minX+1)
481  binValueX=minX+binX;
482  else
483  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
484  double binValueY=0;
485  if(nbin==maxY-minY+1)
486  binValueY=minY+binY;
487  else
488  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
489 
490  double value=static_cast<double>(output[binX][binY]);
491 
492  if(relative_opt[0])
493  value*=100.0/nvalid;
494 
495  cout << binValueX << " " << binValueY << " " << value << std::endl;
496  // double value=static_cast<double>(output[binX][binY])/nvalid;
497  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
498  }
499  }
500  }
501  if(reg_opt[0]&&input_opt.size()<2){
502  if(band_opt.size()<2)
503  continue;
504  imgreg.setDown(down_opt[0]);
505  imgreg.setThreshold(random_opt[0]);
506  double c0=0;//offset
507  double c1=1;//scale
508  double r2=imgreg.getR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
509  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
510  }
511  if(regerr_opt[0]&&input_opt.size()<2){
512  if(band_opt.size()<2)
513  continue;
514  imgreg.setDown(down_opt[0]);
515  imgreg.setThreshold(random_opt[0]);
516  double c0=0;//offset
517  double c1=1;//scale
518  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
519  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
520  }
521  if(rmse_opt[0]&&input_opt.size()<2){
522  if(band_opt.size()<2)
523  continue;
524  imgreg.setDown(down_opt[0]);
525  imgreg.setThreshold(random_opt[0]);
526  double c0=0;//offset
527  double c1=1;//scale
528  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
529  std::cout << " -rmse " << err << std::endl;
530  }
531  if(preg_opt[0]&&input_opt.size()<2){
532  if(band_opt.size()<2)
533  continue;
534  imgreg.setDown(down_opt[0]);
535  imgreg.setThreshold(random_opt[0]);
536  double c0=0;//offset
537  double c1=1;//scale
538  double r2=imgreg.pgetR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
539  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
540  }
541  imgReader.close();
542  }
543  if(reg_opt[0]&&(input_opt.size()>1)){
544  imgreg.setDown(down_opt[0]);
545  imgreg.setThreshold(random_opt[0]);
546  double c0=0;//offset
547  double c1=1;//scale
548  while(band_opt.size()<input_opt.size())
549  band_opt.push_back(band_opt[0]);
550  if(src_min_opt.size()){
551  while(src_min_opt.size()<input_opt.size())
552  src_min_opt.push_back(src_min_opt[0]);
553  }
554  if(src_max_opt.size()){
555  while(src_max_opt.size()<input_opt.size())
556  src_max_opt.push_back(src_max_opt[0]);
557  }
558  ImgReaderGdal imgReader1(input_opt[0]);
559  ImgReaderGdal imgReader2(input_opt[1]);
560 
561  if(offset_opt.size())
562  imgReader1.setOffset(offset_opt[0],band_opt[0]);
563  if(scale_opt.size())
564  imgReader1.setScale(scale_opt[0],band_opt[0]);
565  if(offset_opt.size()>1)
566  imgReader2.setOffset(offset_opt[1],band_opt[1]);
567  if(scale_opt.size()>1)
568  imgReader2.setScale(scale_opt[1],band_opt[1]);
569 
570  for(int inodata=0;inodata<nodata_opt.size();++inodata){
571  if(!inodata){
572  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
573  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
574  }
575  imgReader1.pushNoDataValue(nodata_opt[inodata]);
576  imgReader2.pushNoDataValue(nodata_opt[inodata]);
577  }
578 
579  double r2=imgreg.getR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
580  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
581  imgReader1.close();
582  imgReader2.close();
583  }
584  if(preg_opt[0]&&(input_opt.size()>1)){
585  imgreg.setDown(down_opt[0]);
586  imgreg.setThreshold(random_opt[0]);
587  double c0=0;//offset
588  double c1=1;//scale
589  while(band_opt.size()<input_opt.size())
590  band_opt.push_back(band_opt[0]);
591  if(src_min_opt.size()){
592  while(src_min_opt.size()<input_opt.size())
593  src_min_opt.push_back(src_min_opt[0]);
594  }
595  if(src_max_opt.size()){
596  while(src_max_opt.size()<input_opt.size())
597  src_max_opt.push_back(src_max_opt[0]);
598  }
599  ImgReaderGdal imgReader1(input_opt[0]);
600  ImgReaderGdal imgReader2(input_opt[1]);
601 
602  if(offset_opt.size())
603  imgReader1.setOffset(offset_opt[0],band_opt[0]);
604  if(scale_opt.size())
605  imgReader1.setScale(scale_opt[0],band_opt[0]);
606  if(offset_opt.size()>1)
607  imgReader2.setOffset(offset_opt[1],band_opt[1]);
608  if(scale_opt.size()>1)
609  imgReader2.setScale(scale_opt[1],band_opt[1]);
610 
611  for(int inodata=0;inodata<nodata_opt.size();++inodata){
612  if(!inodata){
613  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
614  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
615  }
616  imgReader1.pushNoDataValue(nodata_opt[inodata]);
617  imgReader2.pushNoDataValue(nodata_opt[inodata]);
618  }
619 
620  double r2=imgreg.pgetR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
621  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
622  imgReader1.close();
623  imgReader2.close();
624  }
625  if(regerr_opt[0]&&(input_opt.size()>1)){
626  imgreg.setDown(down_opt[0]);
627  imgreg.setThreshold(random_opt[0]);
628  double c0=0;//offset
629  double c1=1;//scale
630  while(band_opt.size()<input_opt.size())
631  band_opt.push_back(band_opt[0]);
632  if(src_min_opt.size()){
633  while(src_min_opt.size()<input_opt.size())
634  src_min_opt.push_back(src_min_opt[0]);
635  }
636  if(src_max_opt.size()){
637  while(src_max_opt.size()<input_opt.size())
638  src_max_opt.push_back(src_max_opt[0]);
639  }
640  ImgReaderGdal imgReader1(input_opt[0]);
641  ImgReaderGdal imgReader2(input_opt[1]);
642 
643  if(offset_opt.size())
644  imgReader1.setOffset(offset_opt[0],band_opt[0]);
645  if(scale_opt.size())
646  imgReader1.setScale(scale_opt[0],band_opt[0]);
647  if(offset_opt.size()>1)
648  imgReader2.setOffset(offset_opt[1],band_opt[1]);
649  if(scale_opt.size()>1)
650  imgReader2.setScale(scale_opt[1],band_opt[1]);
651 
652  for(int inodata=0;inodata<nodata_opt.size();++inodata){
653  if(!inodata){
654  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
655  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
656  }
657  imgReader1.pushNoDataValue(nodata_opt[inodata]);
658  imgReader2.pushNoDataValue(nodata_opt[inodata]);
659  }
660 
661  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
662  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
663  imgReader1.close();
664  imgReader2.close();
665  }
666  if(rmse_opt[0]&&(input_opt.size()>1)){
667  imgreg.setDown(down_opt[0]);
668  imgreg.setThreshold(random_opt[0]);
669  double c0=0;//offset
670  double c1=1;//scale
671  while(band_opt.size()<input_opt.size())
672  band_opt.push_back(band_opt[0]);
673  if(src_min_opt.size()){
674  while(src_min_opt.size()<input_opt.size())
675  src_min_opt.push_back(src_min_opt[0]);
676  }
677  if(src_max_opt.size()){
678  while(src_max_opt.size()<input_opt.size())
679  src_max_opt.push_back(src_max_opt[0]);
680  }
681  ImgReaderGdal imgReader1(input_opt[0]);
682  ImgReaderGdal imgReader2(input_opt[1]);
683 
684  if(offset_opt.size())
685  imgReader1.setOffset(offset_opt[0],band_opt[0]);
686  if(scale_opt.size())
687  imgReader1.setScale(scale_opt[0],band_opt[0]);
688  if(offset_opt.size()>1)
689  imgReader2.setOffset(offset_opt[1],band_opt[1]);
690  if(scale_opt.size()>1)
691  imgReader2.setScale(scale_opt[1],band_opt[1]);
692 
693  for(int inodata=0;inodata<nodata_opt.size();++inodata){
694  if(!inodata){
695  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
696  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
697  }
698  imgReader1.pushNoDataValue(nodata_opt[inodata]);
699  imgReader2.pushNoDataValue(nodata_opt[inodata]);
700  }
701 
702  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
703  std::cout << "-rmse " << err << std::endl;
704  imgReader1.close();
705  imgReader2.close();
706  }
707  if(histogram2d_opt[0]&&(input_opt.size()>1)){
708  while(band_opt.size()<input_opt.size())
709  band_opt.push_back(band_opt[0]);
710  if(src_min_opt.size()){
711  while(src_min_opt.size()<input_opt.size())
712  src_min_opt.push_back(src_min_opt[0]);
713  }
714  if(src_max_opt.size()){
715  while(src_max_opt.size()<input_opt.size())
716  src_max_opt.push_back(src_max_opt[0]);
717  }
718  ImgReaderGdal imgReader1(input_opt[0]);
719  ImgReaderGdal imgReader2(input_opt[1]);
720 
721  if(offset_opt.size())
722  imgReader1.setOffset(offset_opt[0],band_opt[0]);
723  if(scale_opt.size())
724  imgReader1.setScale(scale_opt[0],band_opt[0]);
725  if(offset_opt.size()>1)
726  imgReader2.setOffset(offset_opt[1],band_opt[1]);
727  if(scale_opt.size()>1)
728  imgReader2.setScale(scale_opt[1],band_opt[1]);
729 
730  for(int inodata=0;inodata<nodata_opt.size();++inodata){
731  if(!inodata){
732  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
733  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
734  }
735  imgReader1.pushNoDataValue(nodata_opt[inodata]);
736  imgReader2.pushNoDataValue(nodata_opt[inodata]);
737  }
738 
739  imgReader1.getMinMax(minX,maxX,band_opt[0]);
740  imgReader2.getMinMax(minY,maxY,band_opt[1]);
741 
742  if(verbose_opt[0]){
743  cout << "minX: " << minX << endl;
744  cout << "maxX: " << maxX << endl;
745  cout << "minY: " << minY << endl;
746  cout << "maxY: " << maxY << endl;
747  }
748 
749  if(src_min_opt.size()){
750  minX=src_min_opt[0];
751  minY=src_min_opt[1];
752  }
753  if(src_max_opt.size()){
754  maxX=src_max_opt[0];
755  maxY=src_max_opt[1];
756  }
757 
758  nbin=(nbin_opt.size())? nbin_opt[0]:0;
759  if(nbin<=1){
760  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
761  // imgReader1.getMinMax(minX,maxX,band_opt[0]);
762  // imgReader2.getMinMax(minY,maxY,band_opt[0]);
763  if(minX>=maxX)
764  imgReader1.getMinMax(minX,maxX,band_opt[0]);
765  if(minY>=maxY)
766  imgReader2.getMinMax(minY,maxY,band_opt[1]);
767 
768  minValue=(minX<minY)? minX:minY;
769  maxValue=(maxX>maxY)? maxX:maxY;
770  if(verbose_opt[0])
771  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
772  nbin=maxValue-minValue+1;
773  }
774  assert(nbin>1);
775  double sigma=0;
776  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
777  if(kde_opt[0]){
778  GDALProgressFunc pfnProgress;
779  void* pProgressData;
780  GDALRasterBand* rasterBand;
781  double stdDev1=0;
782  double stdDev2=0;
783  rasterBand=imgReader1.getRasterBand(band_opt[0]);
784  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
785  rasterBand=imgReader2.getRasterBand(band_opt[0]);
786  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
787 
788  //todo: think of smarter way how to estimate size (nodata!)
789  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
790  if(random_opt[0]>0)
791  estimatedSize*=random_opt[0]/100.0;
792  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
793  }
794  assert(nbin);
795  if(verbose_opt[0]){
796  if(sigma>0)
797  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
798  else
799  std::cout << "calculating 2d histogram for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
800  std::cout << "nbin: " << nbin << std::endl;
801  }
802 
803  vector< vector<double> > output;
804 
805  if(maxX<=minX)
806  imgReader1.getMinMax(minX,maxX,band_opt[0]);
807  if(maxY<=minY)
808  imgReader2.getMinMax(minY,maxY,band_opt[1]);
809 
810  if(maxX<=minX){
811  std::ostringstream s;
812  s<<"Error: could not calculate distribution (minX>=maxX)";
813  throw(s.str());
814  }
815  if(maxY<=minY){
816  std::ostringstream s;
817  s<<"Error: could not calculate distribution (minY>=maxY)";
818  throw(s.str());
819  }
820  if(verbose_opt[0]){
821  cout << "minX: " << minX << endl;
822  cout << "maxX: " << maxX << endl;
823  cout << "minY: " << minY << endl;
824  cout << "maxY: " << maxY << endl;
825  }
826  output.resize(nbin);
827  for(int i=0;i<nbin;++i){
828  output[i].resize(nbin);
829  for(int j=0;j<nbin;++j)
830  output[i][j]=0;
831  }
832  int binX=0;
833  int binY=0;
834  vector<double> inputX(imgReader1.nrOfCol());
835  vector<double> inputY(imgReader2.nrOfCol());
836  double nvalid=0;
837  double geoX=0;
838  double geoY=0;
839  double icol1=0;
840  double irow1=0;
841  double icol2=0;
842  double irow2=0;
843  for(int irow=0;irow<imgReader1.nrOfRow();++irow){
844  if(irow%down_opt[0])
845  continue;
846  irow1=irow;
847  imgReader1.image2geo(icol1,irow1,geoX,geoY);
848  imgReader2.geo2image(geoX,geoY,icol2,irow2);
849  irow2=static_cast<int>(irow2);
850  imgReader1.readData(inputX,GDT_Float64,irow1,band_opt[0]);
851  imgReader2.readData(inputY,GDT_Float64,irow2,band_opt[1]);
852  for(int icol=0;icol<imgReader.nrOfCol();++icol){
853  if(icol%down_opt[0])
854  continue;
855  icol1=icol;
856  if(random_opt[0]>0){
857  double p=static_cast<double>(rand())/(RAND_MAX);
858  p*=100.0;
859  if(p>random_opt[0])
860  continue;//do not select for now, go to next column
861  }
862  if(imgReader1.isNoData(inputX[icol]))
863  continue;
864  imgReader1.image2geo(icol1,irow1,geoX,geoY);
865  imgReader2.geo2image(geoX,geoY,icol2,irow2);
866  icol2=static_cast<int>(icol2);
867  if(imgReader2.isNoData(inputY[icol2]))
868  continue;
869  // ++nvalid;
870  if(inputX[icol1]>=maxX)
871  binX=nbin-1;
872  else if(inputX[icol]<=minX)
873  binX=0;
874  else
875  binX=static_cast<int>(static_cast<double>(inputX[icol1]-minX)/(maxX-minX)*nbin);
876  if(inputY[icol2]>=maxY)
877  binY=nbin-1;
878  else if(inputY[icol2]<=minY)
879  binY=0;
880  else
881  binY=static_cast<int>(static_cast<double>(inputY[icol2]-minY)/(maxY-minY)*nbin);
882  assert(binX>=0);
883  assert(binX<output.size());
884  assert(binY>=0);
885  assert(binY<output[binX].size());
886  if(sigma>0){
887  //create kde for Gaussian basis function
888  //todo: speed up by calculating first and last bin with non-zero contriubtion...
889  for(int ibinX=0;ibinX<nbin;++ibinX){
890  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
891  double pdfX=gsl_ran_gaussian_pdf(inputX[icol1]-centerX, sigma);
892  for(int ibinY=0;ibinY<nbin;++ibinY){
893  //calculate \integral_ibinX^(ibinX+1)
894  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
895  double pdfY=gsl_ran_gaussian_pdf(inputY[icol2]-centerY, sigma);
896  output[ibinX][binY]+=pdfX*pdfY;
897  nvalid+=pdfX*pdfY;
898  }
899  }
900  }
901  else{
902  ++output[binX][binY];
903  ++nvalid;
904  }
905  }
906  }
907  if(verbose_opt[0])
908  cout << "number of valid pixels: " << nvalid << endl;
909  for(int binX=0;binX<nbin;++binX){
910  cout << endl;
911  for(int binY=0;binY<nbin;++binY){
912  double binValueX=0;
913  if(nbin==maxX-minX+1)
914  binValueX=minX+binX;
915  else
916  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
917  double binValueY=0;
918  if(nbin==maxY-minY+1)
919  binValueY=minY+binY;
920  else
921  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
922  double value=static_cast<double>(output[binX][binY]);
923 
924  if(relative_opt[0]||kde_opt[0])
925  value*=100.0/nvalid;
926 
927  cout << binValueX << " " << binValueY << " " << value << std::endl;
928  // double value=static_cast<double>(output[binX][binY])/nvalid;
929  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
930  }
931  }
932  imgReader1.close();
933  imgReader2.close();
934  }
935 
936  if(!histogram_opt[0]||histogram2d_opt[0])
937  std::cout << std::endl;
938 }
939 
940 // int nband=(band_opt.size()) ? band_opt.size() : imgReader.nrOfBand();
941 
942 // const char* pszMessage;
943 // void* pProgressArg=NULL;
944 // GDALProgressFunc pfnProgress=GDALTermProgress;
945 // double progress=0;
946 // srand(time(NULL));
947 
948 
949 // statfactory::StatFactory stat;
950 // imgregression::ImgRegression imgreg;
951 
952 // pfnProgress(progress,pszMessage,pProgressArg);
953 // for(irow=0;irow<classReader.nrOfRow();++irow){
954 // if(irow%down_opt[0])
955 // continue;
956 // // classReader.readData(classBuffer,GDT_Int32,irow);
957 // classReader.readData(classBuffer,GDT_Float64,irow);
958 // double x,y;//geo coordinates
959 // double iimg,jimg;//image coordinates in img image
960 // for(icol=0;icol<classReader.nrOfCol();++icol){
961 // if(icol%down_opt[0])
962  // continue;
963 
964 
965  // if(rand_opt[0]>0){
966  // gsl_rng* r=stat.getRandomGenerator(time(NULL));
967  // //todo: init random number generator using time...
968  // if(verbose_opt[0])
969  // std::cout << "generating " << rand_opt[0] << " random numbers: " << std::endl;
970  // for(unsigned int i=0;i<rand_opt[0];++i)
971  // std::cout << i << " " << stat.getRandomValue(r,randdist_opt[0],randa_opt[0],randb_opt[0]) << std::endl;
972  // }
973 
974  // imgreg.setDown(down_opt[0]);
975  // imgreg.setThreshold(threshold_opt[0]);
976  // double c0=0;//offset
977  // double c1=1;//scale
978  // double err=uncertNodata_opt[0];//start with high initial value in case we do not have first ob err=imgreg.getRMSE(imgReaderModel1,imgReader,c0,c1,verbose_opt[0]);
979 
980  // int nband=band_opt.size();
981  // if(band_opt[0]<0)
982  // nband=imgReader.nrOfBand();
983  // for(int iband=0;iband<nband;++iband){
984  // unsigned short band_opt[iband]=(band_opt[0]<0)? iband : band_opt[iband];
985 
986  // if(minmax_opt[0]||min_opt[0]||max_opt[0]){
987  // assert(band_opt[iband]<imgReader.nrOfBand());
988  // if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
989  // double uli,ulj,lri,lrj;
990  // imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
991  // imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
992  // imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
993  // }
994  // else
995  // imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
996  // if(minmax_opt[0])
997  // std::cout << "-min " << minValue << " -max " << maxValue << " ";
998  // else{
999  // if(min_opt[0])
1000  // std::cout << "-min " << minValue << " ";
1001  // if(max_opt[0])
1002  // std::cout << "-max " << maxValue << " ";
1003  // }
1004  // }
1005  // }
1006  // if(relative_opt[0])
1007  // hist_opt[0]=true;
1008  // if(hist_opt[0]){
1009  // assert(band_opt[0]<imgReader.nrOfBand());
1010  // unsigned int nbin=(nbin_opt.size())? nbin_opt[0]:0;
1011  // std::vector<unsigned long int> output;
1012  // minValue=0;
1013  // maxValue=0;
1014  // //todo: optimize such that getMinMax is only called once...
1015  // imgReader.getMinMax(minValue,maxValue,band_opt[0]);
1016 
1017  // if(src_min_opt.size())
1018  // minValue=src_min_opt[0];
1019  // if(src_max_opt.size())
1020  // maxValue=src_max_opt[0];
1021  // unsigned long int nsample=imgReader.getHistogram(output,minValue,maxValue,nbin,band_opt[0]);
1022  // std::cout.precision(10);
1023  // for(int bin=0;bin<nbin;++bin){
1024  // double binValue=0;
1025  // if(nbin==maxValue-minValue+1)
1026  // binValue=minValue+bin;
1027  // else
1028  // binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
1029  // std::cout << binValue << " ";
1030  // if(relative_opt[0])
1031  // std::cout << 100.0*static_cast<double>(output[bin])/static_cast<double>(nsample) << std::endl;
1032  // else
1033  // std::cout << static_cast<double>(output[bin]) << std::endl;
1034  // }
1035  // }