Point Cloud Library (PCL)  1.7.1
obj_rec_ransac.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2010-2012, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  *
37  */
38 
39 #ifndef PCL_RECOGNITION_OBJ_REC_RANSAC_H_
40 #define PCL_RECOGNITION_OBJ_REC_RANSAC_H_
41 
42 #include <pcl/recognition/ransac_based/hypothesis.h>
43 #include <pcl/recognition/ransac_based/model_library.h>
44 #include <pcl/recognition/ransac_based/rigid_transform_space.h>
45 #include <pcl/recognition/ransac_based/orr_octree.h>
46 #include <pcl/recognition/ransac_based/orr_octree_zprojection.h>
47 #include <pcl/recognition/ransac_based/simple_octree.h>
48 #include <pcl/recognition/ransac_based/trimmed_icp.h>
49 #include <pcl/recognition/ransac_based/orr_graph.h>
50 #include <pcl/recognition/ransac_based/auxiliary.h>
51 #include <pcl/recognition/ransac_based/bvh.h>
52 #include <pcl/registration/transformation_estimation_svd.h>
53 #include <pcl/correspondence.h>
54 #include <pcl/pcl_exports.h>
55 #include <pcl/point_cloud.h>
56 #include <cmath>
57 #include <string>
58 #include <vector>
59 #include <list>
60 
61 #define OBJ_REC_RANSAC_VERBOSE
62 
63 namespace pcl
64 {
65  namespace recognition
66  {
67  /** \brief This is a RANSAC-based 3D object recognition method. Do the following to use it: (i) call addModel() k times with k different models
68  * representing the objects to be recognized and (ii) call recognize() with the 3D scene in which the objects should be recognized. Recognition means both
69  * object identification and pose (position + orientation) estimation. Check the method descriptions for more details.
70  *
71  * \note If you use this code in any academic work, please cite:
72  *
73  * - Chavdar Papazov, Sami Haddadin, Sven Parusel, Kai Krieger and Darius Burschka.
74  * Rigid 3D geometry matching for grasping of known objects in cluttered scenes.
75  * The International Journal of Robotics Research 2012. DOI: 10.1177/0278364911436019
76  *
77  * - Chavdar Papazov and Darius Burschka.
78  * An Efficient RANSAC for 3D Object Recognition in Noisy and Occluded Scenes.
79  * In Proceedings of the 10th Asian Conference on Computer Vision (ACCV'10),
80  * November 2010.
81  *
82  *
83  * \author Chavdar Papazov
84  * \ingroup recognition
85  */
86  class PCL_EXPORTS ObjRecRANSAC
87  {
88  public:
91 
93 
94  /** \brief This is an output item of the ObjRecRANSAC::recognize() method. It contains the recognized model, its name (the ones passed to
95  * ObjRecRANSAC::addModel()), the rigid transform which aligns the model with the input scene and the match confidence which is a number
96  * in the interval (0, 1] which gives the fraction of the model surface area matched to the scene. E.g., a match confidence of 0.3 means
97  * that 30% of the object surface area was matched to the scene points. If the scene is represented by a single range image, the match
98  * confidence can not be greater than 0.5 since the range scanner sees only one side of each object.
99  */
100  class Output
101  {
102  public:
103  Output (const std::string& object_name, const float rigid_transform[12], float match_confidence, void* user_data) :
104  object_name_ (object_name),
105  match_confidence_ (match_confidence),
106  user_data_ (user_data)
107  {
108  memcpy(this->rigid_transform_, rigid_transform, 12*sizeof (float));
109  }
110  virtual ~Output (){}
111 
112  public:
113  std::string object_name_;
114  float rigid_transform_[12];
116  void* user_data_;
117  };
118 
120  {
121  public:
122  OrientedPointPair (const float *p1, const float *n1, const float *p2, const float *n2)
123  : p1_ (p1), n1_ (n1), p2_ (p2), n2_ (n2)
124  {
125  }
126 
127  virtual ~OrientedPointPair (){}
128 
129  public:
130  const float *p1_, *n1_, *p2_, *n2_;
131  };
132 
134  {
135  public:
137  virtual ~HypothesisCreator (){}
138 
140  };
141 
143 
144  public:
145  /** \brief Constructor with some important parameters which can not be changed once an instance of that class is created.
146  *
147  * \param[in] pair_width should be roughly half the extent of the visible object part. This means, for each object point p there should be (at least)
148  * one point q (from the same object) such that ||p - q|| <= pair_width. Tradeoff: smaller values allow for detection in more occluded scenes but lead
149  * to more imprecise alignment. Bigger values lead to better alignment but require large visible object parts (i.e., less occlusion).
150  *
151  * \param[in] voxel_size is the size of the leafs of the octree, i.e., the "size" of the discretization. Tradeoff: High values lead to less
152  * computation time but ignore object details. Small values allow to better distinguish between objects, but will introduce more holes in the resulting
153  * "voxel-surface" (especially for a sparsely sampled scene). */
154  ObjRecRANSAC (float pair_width, float voxel_size);
155  virtual ~ObjRecRANSAC ()
156  {
157  this->clear ();
158  this->clearTestData ();
159  }
160 
161  /** \brief Removes all models from the model library and releases some memory dynamically allocated by this instance. */
162  void
163  inline clear()
164  {
165  model_library_.removeAllModels ();
166  scene_octree_.clear ();
167  scene_octree_proj_.clear ();
168  sampled_oriented_point_pairs_.clear ();
169  transform_space_.clear ();
170  scene_octree_points_.reset ();
171  }
172 
173  /** \brief This is a threshold. The larger the value the more point pairs will be considered as co-planar and will
174  * be ignored in the off-line model pre-processing and in the online recognition phases. This makes sense only if
175  * "ignore co-planar points" is on. Call this method before calling addModel. This method calls the corresponding
176  * method of the model library. */
177  inline void
178  setMaxCoplanarityAngleDegrees (float max_coplanarity_angle_degrees)
179  {
180  max_coplanarity_angle_ = max_coplanarity_angle_degrees*AUX_DEG_TO_RADIANS;
181  model_library_.setMaxCoplanarityAngleDegrees (max_coplanarity_angle_degrees);
182  }
183 
184  inline void
186  {
187  scene_bounds_enlargement_factor_ = value;
188  }
189 
190  /** \brief Default is on. This method calls the corresponding method of the model library. */
191  inline void
193  {
194  ignore_coplanar_opps_ = true;
195  model_library_.ignoreCoplanarPointPairsOn ();
196  }
197 
198  /** \brief Default is on. This method calls the corresponding method of the model library. */
199  inline void
201  {
202  ignore_coplanar_opps_ = false;
203  model_library_.ignoreCoplanarPointPairsOff ();
204  }
205 
206  inline void
208  {
209  do_icp_hypotheses_refinement_ = true;
210  }
211 
212  inline void
214  {
215  do_icp_hypotheses_refinement_ = false;
216  }
217 
218  /** \brief Add an object model to be recognized.
219  *
220  * \param[in] points are the object points.
221  * \param[in] normals at each point.
222  * \param[in] object_name is an identifier for the object. If that object is detected in the scene 'object_name'
223  * is returned by the recognition method and you know which object has been detected. Note that 'object_name' has
224  * to be unique!
225  * \param[in] user_data is a pointer to some data (can be NULL)
226  *
227  * The method returns true if the model was successfully added to the model library and false otherwise (e.g., if 'object_name' is already in use).
228  */
229  inline bool
230  addModel (const PointCloudIn& points, const PointCloudN& normals, const std::string& object_name, void* user_data = NULL)
231  {
232  return (model_library_.addModel (points, normals, object_name, frac_of_points_for_icp_refinement_, user_data));
233  }
234 
235  /** \brief This method performs the recognition of the models loaded to the model library with the method addModel().
236  *
237  * \param[in] scene is the 3d scene in which the object should be recognized.
238  * \param[in] normals are the scene normals.
239  * \param[out] recognized_objects is the list of output items each one containing the recognized model instance, its name, the aligning rigid transform
240  * and the match confidence (see ObjRecRANSAC::Output for further explanations).
241  * \param[in] success_probability is the user-defined probability of detecting all objects in the scene.
242  */
243  void
244  recognize (const PointCloudIn& scene, const PointCloudN& normals, std::list<ObjRecRANSAC::Output>& recognized_objects, double success_probability = 0.99);
245 
246  inline void
248  {
249  rec_mode_ = ObjRecRANSAC::SAMPLE_OPP;
250  }
251 
252  inline void
254  {
255  rec_mode_ = ObjRecRANSAC::TEST_HYPOTHESES;
256  }
257 
258  inline void
260  {
261  rec_mode_ = ObjRecRANSAC::FULL_RECOGNITION;
262  }
263 
264  /** \brief This function is useful for testing purposes. It returns the oriented point pairs which were sampled from the
265  * scene during the recognition process. Makes sense only if some of the testing modes are active. */
266  inline const std::list<ObjRecRANSAC::OrientedPointPair>&
268  {
269  return (sampled_oriented_point_pairs_);
270  }
271 
272  /** \brief This function is useful for testing purposes. It returns the accepted hypotheses generated during the
273  * recognition process. Makes sense only if some of the testing modes are active. */
274  inline const std::vector<Hypothesis>&
276  {
277  return (accepted_hypotheses_);
278  }
279 
280  /** \brief This function is useful for testing purposes. It returns the accepted hypotheses generated during the
281  * recognition process. Makes sense only if some of the testing modes are active. */
282  inline void
283  getAcceptedHypotheses (std::vector<Hypothesis>& out) const
284  {
285  out = accepted_hypotheses_;
286  }
287 
288  /** \brief Returns the hash table in the model library. */
290  getHashTable () const
291  {
292  return (model_library_.getHashTable ());
293  }
294 
295  inline const ModelLibrary&
297  {
298  return (model_library_);
299  }
300 
301  inline const ModelLibrary::Model*
302  getModel (const std::string& name) const
303  {
304  return (model_library_.getModel (name));
305  }
306 
307  inline const ORROctree&
308  getSceneOctree () const
309  {
310  return (scene_octree_);
311  }
312 
313  inline RigidTransformSpace&
315  {
316  return (transform_space_);
317  }
318 
319  inline float
320  getPairWidth () const
321  {
322  return pair_width_;
323  }
324 
325  protected:
326  enum Recognition_Mode {SAMPLE_OPP, TEST_HYPOTHESES, /*BUILD_CONFLICT_GRAPH,*/ FULL_RECOGNITION};
327 
328  friend class ModelLibrary;
329 
330  inline int
331  computeNumberOfIterations (double success_probability) const
332  {
333  // 'p_obj' is the probability that given that the first sample point belongs to an object,
334  // the second sample point will belong to the same object
335  const double p_obj = 0.25f;
336  // old version: p = p_obj*relative_obj_size_*fraction_of_pairs_in_hash_table_;
337  const double p = p_obj*relative_obj_size_;
338 
339  if ( 1.0 - p <= 0.0 )
340  return 1;
341 
342  return static_cast<int> (log (1.0-success_probability)/log (1.0-p) + 1.0);
343  }
344 
345  inline void
347  {
348  sampled_oriented_point_pairs_.clear ();
349  accepted_hypotheses_.clear ();
350  transform_space_.clear ();
351  }
352 
353  void
354  sampleOrientedPointPairs (int num_iterations, const std::vector<ORROctree::Node*>& full_scene_leaves, std::list<OrientedPointPair>& output) const;
355 
356  int
357  generateHypotheses (const std::list<OrientedPointPair>& pairs, std::list<HypothesisBase>& out) const;
358 
359  /** \brief Groups close hypotheses in 'hypotheses'. Saves a representative for each group in 'out'. Returns the
360  * number of hypotheses after grouping. */
361  int
362  groupHypotheses(std::list<HypothesisBase>& hypotheses, int num_hypotheses, RigidTransformSpace& transform_space,
363  HypothesisOctree& grouped_hypotheses) const;
364 
365  inline void
366  testHypothesis (Hypothesis* hypothesis, int& match, int& penalty) const;
367 
368  inline void
369  testHypothesisNormalBased (Hypothesis* hypothesis, float& match) const;
370 
371  void
372  buildGraphOfCloseHypotheses (HypothesisOctree& hypotheses, ORRGraph<Hypothesis>& graph) const;
373 
374  void
375  filterGraphOfCloseHypotheses (ORRGraph<Hypothesis>& graph, std::vector<Hypothesis>& out) const;
376 
377  void
378  buildGraphOfConflictingHypotheses (const BVHH& bvh, ORRGraph<Hypothesis*>& graph) const;
379 
380  void
381  filterGraphOfConflictingHypotheses (ORRGraph<Hypothesis*>& graph, std::list<ObjRecRANSAC::Output>& recognized_objects) const;
382 
383  /** \brief Computes the rigid transform that maps the line (a1, b1) to (a2, b2).
384  * The computation is based on the corresponding points 'a1' <-> 'a2' and 'b1' <-> 'b2'
385  * and the normals 'a1_n', 'b1_n', 'a2_n', and 'b2_n'. The result is saved in
386  * 'rigid_transform' which is an array of length 12. The first 9 elements are the
387  * rotational part (row major order) and the last 3 are the translation. */
388  inline void
390  const float *a1, const float *a1_n, const float *b1, const float* b1_n,
391  const float *a2, const float *a2_n, const float *b2, const float* b2_n,
392  float* rigid_transform) const
393  {
394  // Some local variables
395  float o1[3], o2[3], x1[3], x2[3], y1[3], y2[3], z1[3], z2[3], tmp1[3], tmp2[3], Ro1[3], invFrame1[3][3];
396 
397  // Compute the origins
398  o1[0] = 0.5f*(a1[0] + b1[0]);
399  o1[1] = 0.5f*(a1[1] + b1[1]);
400  o1[2] = 0.5f*(a1[2] + b1[2]);
401 
402  o2[0] = 0.5f*(a2[0] + b2[0]);
403  o2[1] = 0.5f*(a2[1] + b2[1]);
404  o2[2] = 0.5f*(a2[2] + b2[2]);
405 
406  // Compute the x-axes
407  aux::diff3 (b1, a1, x1); aux::normalize3 (x1);
408  aux::diff3 (b2, a2, x2); aux::normalize3 (x2);
409  // Compute the y-axes. First y-axis
410  aux::projectOnPlane3 (a1_n, x1, tmp1); aux::normalize3 (tmp1);
411  aux::projectOnPlane3 (b1_n, x1, tmp2); aux::normalize3 (tmp2);
412  aux::sum3 (tmp1, tmp2, y1); aux::normalize3 (y1);
413  // Second y-axis
414  aux::projectOnPlane3 (a2_n, x2, tmp1); aux::normalize3 (tmp1);
415  aux::projectOnPlane3 (b2_n, x2, tmp2); aux::normalize3 (tmp2);
416  aux::sum3 (tmp1, tmp2, y2); aux::normalize3 (y2);
417  // Compute the z-axes
418  aux::cross3 (x1, y1, z1);
419  aux::cross3 (x2, y2, z2);
420 
421  // 1. Invert the matrix [x1|y1|z1] (note that x1, y1, and z1 are treated as columns!)
422  invFrame1[0][0] = x1[0]; invFrame1[0][1] = x1[1]; invFrame1[0][2] = x1[2];
423  invFrame1[1][0] = y1[0]; invFrame1[1][1] = y1[1]; invFrame1[1][2] = y1[2];
424  invFrame1[2][0] = z1[0]; invFrame1[2][1] = z1[1]; invFrame1[2][2] = z1[2];
425  // 2. Compute the desired rotation as rigid_transform = [x2|y2|z2]*invFrame1
426  aux::mult3x3 (x2, y2, z2, invFrame1, rigid_transform);
427 
428  // Construct the translation which is the difference between the rotated o1 and o2
429  aux::mult3x3 (rigid_transform, o1, Ro1);
430  rigid_transform[9] = o2[0] - Ro1[0];
431  rigid_transform[10] = o2[1] - Ro1[1];
432  rigid_transform[11] = o2[2] - Ro1[2];
433  }
434 
435  /** \brief Computes the signature of the oriented point pair ((p1, n1), (p2, n2)) consisting of the angles between
436  * n1 and (p2-p1),
437  * n2 and (p1-p2),
438  * n1 and n2
439  *
440  * \param[out] signature is an array of three doubles saving the three angles in the order shown above. */
441  static inline void
442  compute_oriented_point_pair_signature (const float *p1, const float *n1, const float *p2, const float *n2, float signature[3])
443  {
444  // Get the line from p1 to p2
445  float cl[3] = {p2[0] - p1[0], p2[1] - p1[1], p2[2] - p1[2]};
446  aux::normalize3 (cl);
447 
448  signature[0] = std::acos (aux::clamp (aux::dot3 (n1,cl), -1.0f, 1.0f)); cl[0] = -cl[0]; cl[1] = -cl[1]; cl[2] = -cl[2];
449  signature[1] = std::acos (aux::clamp (aux::dot3 (n2,cl), -1.0f, 1.0f));
450  signature[2] = std::acos (aux::clamp (aux::dot3 (n1,n2), -1.0f, 1.0f));
451  }
452 
453  protected:
454  // Parameters
455  float pair_width_;
456  float voxel_size_;
461  float visibility_;
469 
476 
477  std::list<OrientedPointPair> sampled_oriented_point_pairs_;
478  std::vector<Hypothesis> accepted_hypotheses_;
480  };
481  } // namespace recognition
482 } // namespace pcl
483 
484 #endif // PCL_RECOGNITION_OBJ_REC_RANSAC_H_