Point Cloud Library (PCL)  1.10.0-dev
cuda_async_copy.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  * @authors: Anatoly Baksheev
37  */
38 
39 #pragma once
40 
41 #include <pcl/gpu/containers/device_array.h>
42 #include <pcl/gpu/utils/safe_call.hpp>
43 
44 namespace pcl
45 {
46  namespace gpu
47  {
48  template<class T>
49  class AsyncCopy
50  {
51  public:
52  AsyncCopy(T* ptr, std::size_t size) : ptr_(ptr)
53  {
54  cudaSafeCall( cudaHostRegister(ptr_, size, 0) );
55  cudaSafeCall( cudaStreamCreate(&stream_) );
56  }
57 
58  AsyncCopy(std::vector<T>& data) : ptr_(&data[0])
59  {
60  cudaSafeCall( cudaHostRegister(ptr_, data.size(), 0) );
61  cudaSafeCall( cudaStreamCreate(&stream_) );
62  }
63 
65  {
66  cudaSafeCall( cudaHostUnregister(ptr_) );
67  cudaSafeCall( cudaStreamDestroy(stream_) );
68  }
69 
70  void download(const DeviceArray<T>& arr)
71  {
72  cudaSafeCall( cudaMemcpyAsync(ptr_, arr.ptr(), arr.sizeBytes(), cudaMemcpyDeviceToHost, stream_) );
73  }
74 
75  void download(const DeviceArray2D<T>& arr)
76  {
77  cudaSafeCall( cudaMemcpy2DAsync(ptr_, arr.cols(), arr.ptr(), arr.step(), arr.colsBytes(), arr.rows(), cudaMemcpyDeviceToHost, stream_) );
78  }
79 
80  void upload(const DeviceArray<T>& arr) const
81  {
82  cudaSafeCall( cudaMemcpyAsync(arr.ptr(), ptr_, arr.size(), cudaMemcpyHostToDevice, stream_) );
83  }
84 
85  void upload(const DeviceArray2D<T>& arr) const
86  {
87  cudaSafeCall( cudaMemcpy2DAsync(arr.ptr(), arr.step(), ptr_, arr.cols(), arr.colsBytes(), arr.rows(), cudaMemcpyHostToDevice, stream_) );
88  }
89 
91  {
92  cudaSafeCall( cudaStreamSynchronize(stream_) );
93  }
94  private:
95  cudaStream_t stream_;
96  T* ptr_ ;
97  };
98  }
99 
100  namespace device
101  {
102  using pcl::gpu::AsyncCopy;
103  }
104 }
std::size_t size() const
Returns size in elements.
AsyncCopy(T *ptr, std::size_t size)
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
int cols() const
Returns number of elements in each row.
DeviceArray2D class
Definition: device_array.h:153
int colsBytes() const
Returns number of bytes in each row.
T * ptr(int y=0)
Returns pointer to given row in internal buffer.
std::size_t step() const
Returns stride between two consecutive rows in bytes for internal buffer.
AsyncCopy(std::vector< T > &data)
DeviceArray class
Definition: device_array.h:56
void download(const DeviceArray2D< T > &arr)
void download(const DeviceArray< T > &arr)
void upload(const DeviceArray2D< T > &arr) const
std::size_t sizeBytes() const
void upload(const DeviceArray< T > &arr) const
int rows() const
Returns number of rows.
T * ptr()
Returns pointer for internal buffer in GPU memory.