SHOGUN  v3.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules
Histogram.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/lib/common.h>
15 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CDistribution()
22 {
23  hist=SG_CALLOC(float64_t, 1<<16);
24 }
25 
27 : CDistribution()
28 {
29  hist=SG_CALLOC(float64_t, 1<<16);
30  features=f;
31 }
32 
34 {
35  SG_FREE(hist);
36 }
37 
39 {
40  int32_t vec;
41  int32_t feat;
42  int32_t i;
43 
44  if (data)
45  {
46  if (data->get_feature_class() != C_STRING ||
47  data->get_feature_type() != F_WORD)
48  {
49  SG_ERROR("Expected features of class string type word\n")
50  }
51  set_features(data);
52  }
53 
57 
58  for (i=0; i< (int32_t) (1<<16); i++)
59  hist[i]=0;
60 
61  for (vec=0; vec<features->get_num_vectors(); vec++)
62  {
63  int32_t len;
64  bool free_vec;
65 
66  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
67  get_feature_vector(vec, len, free_vec);
68 
69  for (feat=0; feat<len ; feat++)
70  hist[vector[feat]]++;
71 
73  free_feature_vector(vector, vec, free_vec);
74  }
75 
76  for (i=0; i< (int32_t) (1<<16); i++)
77  hist[i]=log(hist[i]);
78 
79  return true;
80 }
81 
83 {
87 
88  int32_t len;
89  bool free_vec;
90  float64_t loglik=0;
91 
92  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
93  get_feature_vector(num_example, len, free_vec);
94 
95  for (int32_t i=0; i<len; i++)
96  loglik+=hist[vector[i]];
97 
99  free_feature_vector(vector, num_example, free_vec);
100 
101  return loglik;
102 }
103 
104 float64_t CHistogram::get_log_derivative(int32_t num_param, int32_t num_example)
105 {
106  if (hist[num_param] < CMath::ALMOST_NEG_INFTY)
107  return -CMath::INFTY;
108  else
109  {
113 
114  int32_t len;
115  bool free_vec;
116  float64_t deriv=0;
117 
118  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
119  get_feature_vector(num_example, len, free_vec);
120 
121  int32_t num_occurences=0;
122 
123  for (int32_t i=0; i<len; i++)
124  {
125  deriv+=hist[vector[i]];
126 
127  if (vector[i]==num_param)
128  num_occurences++;
129  }
130 
132  free_feature_vector(vector, num_example, free_vec);
133 
134  if (num_occurences>0)
135  deriv+=CMath::log((float64_t) num_occurences)-hist[num_param];
136  else
137  deriv=-CMath::INFTY;
138 
139  return deriv;
140  }
141 }
142 
144 {
145  return hist[num_param];
146 }
147 
149 {
150  ASSERT(histogram.vlen==get_num_model_parameters())
151 
152  SG_FREE(hist);
153  hist=SG_MALLOC(float64_t, histogram.vlen);
154  for (int32_t i=0; i<histogram.vlen; i++)
155  hist[i]=histogram.vector[i];
156 
157  return true;
158 }
159 
161 {
163 }
164 
virtual void set_features(CFeatures *f)
Definition: Distribution.h:157
static const float64_t INFTY
infinity
Definition: Math.h:1330
virtual bool set_histogram(const SGVector< float64_t > histogram)
Definition: Histogram.cpp:148
virtual int32_t get_num_vectors() const =0
#define SG_ERROR(...)
Definition: SGIO.h:131
Base class Distribution from which all methods implementing a distribution are derived.
Definition: Distribution.h:41
virtual SGVector< float64_t > get_histogram()
Definition: Histogram.cpp:160
static const float64_t ALMOST_NEG_INFTY
almost neg (log) infinity
Definition: Math.h:1334
index_t vlen
Definition: SGVector.h:706
#define ASSERT(x)
Definition: SGIO.h:203
double float64_t
Definition: common.h:48
virtual EFeatureClass get_feature_class() const =0
virtual float64_t get_log_model_parameter(int32_t num_param)
Definition: Histogram.cpp:143
virtual ~CHistogram()
Definition: Histogram.cpp:33
float64_t * hist
Definition: Histogram.h:96
virtual bool train(CFeatures *data=NULL)
Definition: Histogram.cpp:38
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:16
The class Features is the base class of all feature objects.
Definition: Features.h:62
static float64_t log(float64_t v)
Definition: Math.h:420
virtual float64_t get_log_likelihood_example(int32_t num_example)
Definition: Histogram.cpp:82
virtual int32_t get_num_model_parameters()
Definition: Histogram.h:53
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
Definition: Histogram.cpp:104
virtual EFeatureType get_feature_type() const =0

SHOGUN Machine Learning Toolbox - Documentation