/*
Copyright (c) 2011 Paul Richmond, University of Sheffield , UK;
all rights reserved unless otherwise stated.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
In addition to the regulations of the GNU General Public License,
publications and communications based in parts on this program or on
parts of this program are required to cite the article
"Democratic population decisions result in robust policy-gradient
learning: a parametric study with GPU simulations" by Paul Richmond,
Lars Buesing, Michele Giugliano and Eleni Vasilaki, PLoS ONE Neuroscience,
Under Review..
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
*/
/*
This CUDA header file provides the necessary device functions for an
algorithm implementing random number generation on the GPU. The
algorithm is based upon that described in the article "Harvesting
graphics power for MD simulations" by J.A. van Meel, A. Arnold,
D. Frenkel, S. F. Portegies Zwart and R. G. Belleman, Molecular Simulation,
Vol. 34, p. 259 (2007) distributed under the GNU GPL v2.
*/
#ifndef _RANDOM_CUDA_HEADER_
#define _RANDOM_CUDA_HEADER_
__device__ static uint2 RNG_rand48_iterate_single(rand48seeds Xn, magic_numbers mn)
{
unsigned int R0, R1;
// low 24-bit multiplication
const unsigned int lo00 = __umul24(Xn.x, mn.x);
const unsigned int hi00 = __umulhi(Xn.x, mn.x);
// 24bit distribution of 32bit multiplication results
R0 = (lo00 & 0xFFFFFF);
R1 = (lo00 >> 24) | (hi00 << 8);
R0 += mn.z; R1 += mn.w;
// transfer overflows
R1 += (R0 >> 24);
R0 &= 0xFFFFFF;
// cross-terms, low/hi 24-bit multiplication
R1 += __umul24(Xn.y, mn.x);
R1 += __umul24(Xn.x, mn.y);
R1 &= 0xFFFFFF;
return make_uint2(R0, R1);
}
__device__ float rand48(rand48seeds* seeds,magic_numbers mn, int width)
{
int index = threadIdx.x + blockIdx.x*blockDim.x;
index += blockIdx.y*width;
uint2 state = seeds[index];
int rand = ( state.x >> 17 ) | ( state.y << 7);
// this actually iterates the RNG
state = RNG_rand48_iterate_single(state, mn);
seeds[index] = state;
return (float)rand/2147483647;
}
#endif