|
| 1 | +use arrayfire as af; |
| 2 | +use rustacuda::prelude::*; |
| 3 | +use rustacuda::*; |
| 4 | + |
| 5 | +use std::ffi::CString; |
| 6 | + |
| 7 | +fn main() { |
| 8 | + // MAKE SURE to do all rustacuda initilization before arrayfire API's |
| 9 | + // first call. It seems like some CUDA context state is getting messed up |
| 10 | + // if we mix CUDA context init(device, context, module, stream) with ArrayFire API |
| 11 | + match rustacuda::init(CudaFlags::empty()) { |
| 12 | + Ok(()) => {} |
| 13 | + Err(e) => panic!("rustacuda init failure: {:?}", e), |
| 14 | + } |
| 15 | + let device = match Device::get_device(0) { |
| 16 | + Ok(d) => d, |
| 17 | + Err(e) => panic!("Failed to get device: {:?}", e), |
| 18 | + }; |
| 19 | + let _context = |
| 20 | + match Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device) { |
| 21 | + Ok(c) => c, |
| 22 | + Err(e) => panic!("Failed to create context: {:?}", e), |
| 23 | + }; |
| 24 | + let ptx = CString::new(include_str!("./resources/add.ptx")).unwrap(); |
| 25 | + let module = match Module::load_from_string(&ptx) { |
| 26 | + Ok(m) => m, |
| 27 | + Err(e) => panic!("Failed to load module from string: {:?}", e), |
| 28 | + }; |
| 29 | + let stream = match Stream::new(StreamFlags::NON_BLOCKING, None) { |
| 30 | + Ok(s) => s, |
| 31 | + Err(e) => panic!("Failed to create stream: {:?}", e), |
| 32 | + }; |
| 33 | + |
| 34 | + af::set_device(0); |
| 35 | + af::info(); |
| 36 | + |
| 37 | + let num: i32 = 10; |
| 38 | + let x = af::constant(1f32, af::dim4!(10)); |
| 39 | + let y = af::constant(2f32, af::dim4!(10)); |
| 40 | + let out = af::constant(0f32, af::dim4!(10)); |
| 41 | + |
| 42 | + af::af_print!("x", x); |
| 43 | + af::af_print!("y", y); |
| 44 | + af::af_print!("out(init)", out); |
| 45 | + |
| 46 | + //TODO Figure out how to use Stream returned by ArrayFire with Rustacuda |
| 47 | + // let af_id = get_device(); |
| 48 | + // let cuda_id = get_device_native_id(af_id); |
| 49 | + // let af_cuda_stream = get_stream(cuda_id); |
| 50 | + |
| 51 | + //TODO Figure out how to use Stream returned by ArrayFire with Rustacuda |
| 52 | + // let stream = Stream {inner: mem::transmute(af_cuda_stream)}; |
| 53 | + |
| 54 | + // Run a custom CUDA kernel in the ArrayFire CUDA stream |
| 55 | + unsafe { |
| 56 | + // Obtain device pointers from ArrayFire using Array::device() method |
| 57 | + let d_x: *mut f32 = x.device_ptr() as *mut f32; |
| 58 | + let d_y: *mut f32 = y.device_ptr() as *mut f32; |
| 59 | + let d_o: *mut f32 = out.device_ptr() as *mut f32; |
| 60 | + |
| 61 | + match launch!(module.sum<<<1, 1, 0, stream>>>( |
| 62 | + memory::DevicePointer::wrap(d_x), |
| 63 | + memory::DevicePointer::wrap(d_y), |
| 64 | + memory::DevicePointer::wrap(d_o), |
| 65 | + num |
| 66 | + )) { |
| 67 | + Ok(()) => {} |
| 68 | + Err(e) => panic!("Kernel Launch failure: {:?}", e), |
| 69 | + } |
| 70 | + |
| 71 | + // wait for the kernel to finish as it is async call |
| 72 | + match stream.synchronize() { |
| 73 | + Ok(()) => {} |
| 74 | + Err(e) => panic!("Stream sync failure: {:?}", e), |
| 75 | + }; |
| 76 | + |
| 77 | + // Return control of Array memory to ArrayFire using unlock |
| 78 | + x.unlock(); |
| 79 | + y.unlock(); |
| 80 | + out.unlock(); |
| 81 | + } |
| 82 | + af::af_print!("sum after kernel launch", out); |
| 83 | +} |
0 commit comments