#include <stdio.h>
#include <iostream>
#include <math.h>
#define I 7
#define dt 0.01
#define a 0.02
#define b 0.2
#define c -65.
#define d 6.
// Set GPU parallelization
#define BLOCKS 4
#define THREADS 256
// Set simulation time
#define TIME_ITERATIONS 6000000l
__global__
void run(float *v, float *u)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
for(unsigned long t = 0; t<TIME_ITERATIONS; ++t){
v[i] += dt*(0.04f*v[i]*v[i]+5.f*v[i]+140.f-u[i]+I);
u[i] += dt*a*(b*v[i]-u[i]);
if (v[i] > 30.f){
v[i] = c;
u[i] = u[i] + d;
}
}
}
int main(void)
{
int N = BLOCKS*THREADS;
float *v, *u;
// Allocate Unified Memory – accessible from CPU or GPU
cudaMallocManaged(&v, N*sizeof(float));
cudaMallocManaged(&u, N*sizeof(float));
// initialize arrays on the host
for (int i = 0; i < N; i++) {
v[i] = -70.f;
u[i] = -14.f;
}
// Run kernel on the GPU
run<<<BLOCKS, THREADS>>>(v, u);
// Wait for GPU to finish before accessing on host
cudaDeviceSynchronize();
//check for errors
cudaError_t e = cudaGetLastError();
if(e){
printf("ERROR (%d): %s\n",e,cudaGetErrorString(e));
}
// Free memory
cudaFree(v);
cudaFree(u);
return 0;
}