Point Cloud Library (PCL)  1.9.1-dev
tree_train.h
1 /* *************************************************
2  *
3  * Copyright (2011) Willow Garage
4  *
5  * Author : Cedric Cagniart
6  * ************************************************* */
7 
8 #pragma once
9 
10 #include "tree.h"
11 #include <boost/array.hpp>
12 
13 namespace pcl
14 {
15  namespace gpu
16  {
17  namespace people
18  {
19  namespace trees
20  {
21  // ################################################
22  // ################################################
23  // histogram stuff
24  class Histogram : public boost::array<uint32_t,NUMLABELS> {
25  public :
26  inline Histogram() { std::fill(begin(), end(), 0); }
27  };
28 
29  struct HistogramPair {
30  public :
31  // accumulate on the histograms
32  inline void accumTrue(const Label label ) {
33  m_h_true[label]++;
34  }
35  inline void accumFalse(const Label label ) {
36  m_h_false[label]++;
37  }
38 
39  inline Histogram& h_false() { return m_h_false; }
40  inline Histogram& h_true() { return m_h_true; }
41 
42  inline const Histogram h_false() const { return m_h_false; }
43  inline const Histogram h_true() const { return m_h_true; }
44 
45  protected :
48  };
49 
50  // ###############################################
51  // ###############################################
52  // SplitPoint
53  struct SplitPoint{
54  inline SplitPoint( int ai, Attrib t):attribId(ai), threshold(t){}
55  int attribId;
57  };
58 
59  // ###############################################
60  // ###############################################
61  // Data Structures as stored in binary files
62  struct LabeledAttrib {
63  inline LabeledAttrib(){}
64  inline LabeledAttrib( const Label& label, const Attrib& attrib): l(label), a(attrib){}
67  };
68 
69  // this is only going to be a helper structure
70  struct LabeledFeature { // : boost::noncopyable {
71  // constructors
72  inline LabeledFeature(): l(NOLABEL){
73  }
74  inline LabeledFeature( const LabeledFeature& B){
75  l = B.l;
76  std::copy( B.attribs, B.attribs + NUMATTRIBS, attribs );
77  }
78  Label l; // WARNING the compiler will pad here
79  Attrib attribs[NUMATTRIBS];
80  };
81 
82 
83  // compute the number of elements
84  static inline uint64_t numElements( const Histogram& h ) {
85  uint64_t Ntotal = 0;
86  for(int li=0;li<NUMLABELS;++li) Ntotal += uint64_t(h[li]);
87  return Ntotal;
88  }
89 
90  /**
91  * This is cool
92  */
93  static inline double entropy( const Histogram& h ) {
94  double Ntotal = numElements(h);
95  double entropy = 0.;
96  for(int li=0;li<NUMLABELS;++li) {
97  if( h[li] != 0 ) {
98  double p = double(h[li]) / Ntotal;
99  entropy -= p*std::log(p);
100  }
101  }
102  return entropy;
103  }
104 
105  /**
106  * This is a little weird.. it will just compute the entropy of the merged histograms
107  */
108  static inline double entropy_merged( const HistogramPair& hp ) {
109  const Histogram& htrue = hp.h_true();
110  const Histogram& hfalse = hp.h_false();
111 
112  double Ntotal = numElements(htrue) + numElements(hfalse);
113  double entropy = 0.;
114  for(int li=0;li<NUMLABELS;++li) {
115  uint64_t Ni = uint64_t(htrue[li]) + uint64_t(hfalse[li]);
116  if( Ni != 0) {
117  double p = double(Ni) / Ntotal;
118  entropy -= p*std::log(p);
119  }
120  }
121  return entropy;
122  }
123 
124  /**
125  * This will compute the gain in information resulting from the split
126  */
127  static inline double informationGain( const HistogramPair& hp) {
128  double e0 = entropy_merged(hp);
129  double etrue = entropy(hp.h_true());
130  double efalse = entropy(hp.h_false());
131 
132  double Ntrue = numElements(hp.h_true());
133  double Nfalse = numElements(hp.h_false());
134  double Ntotal = Ntrue + Nfalse;
135 
136  // lets avoid division by 0
137  if( Ntotal == 0 ) return 0.;
138  return e0 - (Ntrue/Ntotal)*etrue - (Nfalse/Ntotal)*efalse;
139  }
140 
141  // #########################################
142  // #########################################
143  // Reading and writing histograms
144  static inline std::ostream& operator << (std::ostream& os, const Histogram& h) {
145  for(int li=0;li<NUMLABELS;++li) os<< h[li]<<" ";
146  os<<"\n";
147  return os;
148  }
149 
150  static inline std::istream& operator >> (std::istream& is, Histogram& h) {
151  for(int li=0;li<NUMLABELS;++li) is >> h[li];
152  return is;
153  }
154 
155  // #######################################
156  // #######################################
157  // reading and writing histogram Pairs
158  static inline std::ostream& operator << ( std::ostream& os, const HistogramPair& hp) {
159  os << hp.h_false();
160  os << hp.h_true();
161  return os;
162  }
163 
164  static inline std::istream& operator >> ( std::istream& is, HistogramPair& hp) {
165  is >> hp.h_false();
166  is >> hp.h_true();
167  return is;
168  }
169 
170  // #########################################
171  // #########################################
172  // Reading and writing LabeledFeature Vectors ( label + collection of attrib )
173  static void writeLabeledFeatureVec( std::ostream& os, const std::vector<LabeledFeature>& lfs ){
174  os.write( (const char*)&lfs[0], sizeof(LabeledFeature)*lfs.size() );
175  }
176 
177  // static void readLabeledFeature( std::istream& is, LabeledFeature& lf)
178  // {
179  // is.read( (char*)&lf, sizeof(LabeledFeature) );
180  // if( is.fail() ) throw std::runtime_error();
181  // }
182 
183  // #######################################
184  // #######################################
185  // reading and writing split points
186  inline std::ostream& operator << ( std::ostream& os, const SplitPoint& sp){
187  os<<sp.attribId<<" "<<sp.threshold<<"\n";
188  return os;
189  }
190 
191  inline std::istream& operator >> ( std::istream& is, SplitPoint& sp){
192  is >> sp.attribId >> sp.threshold;
193  return is;
194  }
195 
196  // #######################################
197  // #######################################
198  // reading and writing info files
199  inline void writeInfoFile( const std::string& filename,
200  int attribId,
201  Attrib threshold,
202  double gain,
203  const HistogramPair& HP){
204  std::ofstream fout(filename.c_str() );
205  if( !fout.is_open() ) throw std::runtime_error(std::string("(E) could not open ") + filename );
206 
207  fout<<int(attribId)<<" "<<int(threshold)<<"\n";
208  fout<<gain<<"\n";
209  fout<<HP;
210  }
211 
212  inline void readInfoFile( const std::string& filename,
213  int& attribId,
214  Attrib& threshold,
215  double& gain,
216  HistogramPair& HP ) {
217  std::ifstream fin(filename.c_str() );
218  if( !fin.is_open() ) throw std::runtime_error(std::string("(E) could not open") + filename );
219 
220  fin>>attribId >>threshold>>gain>>HP;
221  if( fin.fail() ) throw std::runtime_error(std::string("(E) malformed splitInfo file ") + filename );
222  }
223 
224 
225  } // end namespace trees
226  } // end namespace people
227  } // end namespace gpu
228 } // end namespace pcl
std::ostream & operator<<(std::ostream &os, const AttribLocation &aloc)
Definition: tree.h:106
const Histogram h_false() const
Definition: tree_train.h:42
std::istream & operator>>(std::istream &is, AttribLocation &aloc)
Definition: tree.h:107
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
const Histogram h_true() const
Definition: tree_train.h:43
static uint64_t numElements(const Histogram &h)
Definition: tree_train.h:84
static double informationGain(const HistogramPair &hp)
This will compute the gain in information resulting from the split.
Definition: tree_train.h:127
static void writeLabeledFeatureVec(std::ostream &os, const std::vector< LabeledFeature > &lfs)
Definition: tree_train.h:173
void accumTrue(const Label label)
Definition: tree_train.h:32
void readInfoFile(const std::string &filename, int &attribId, Attrib &threshold, double &gain, HistogramPair &HP)
Definition: tree_train.h:212
LabeledFeature(const LabeledFeature &B)
Definition: tree_train.h:74
static double entropy_merged(const HistogramPair &hp)
This is a little weird.
Definition: tree_train.h:108
LabeledAttrib(const Label &label, const Attrib &attrib)
Definition: tree_train.h:64
void accumFalse(const Label label)
Definition: tree_train.h:35
void writeInfoFile(const std::string &filename, int attribId, Attrib threshold, double gain, const HistogramPair &HP)
Definition: tree_train.h:199
Definition: norms.h:54
static double entropy(const Histogram &h)
This is cool.
Definition: tree_train.h:93