### Some important parts of the CUDA code

dist_h = (float*)malloc(fsize);

status_h = (int*)malloc(isize);

start_h = (int*)malloc(isize);

end_h = (int*)malloc(isize);

float R = 10.0*sqrt(2.0);

int Nt = 8;

Variables for inputting Radius

And # of threads required for computation

Variables for tracking contour length,

Errors, start and end ‘x’ value for each thread

/* Kernel */

__global__ void distance(int *start, int *end, float *dist, int *status, float R)

{ int yold = floor(0.5+(sqrt(R*R-(start[threadIdx.x]-1.0)*(start[threadIdx.x]-1.0))));

int d = 0;

int flag = 0;

for (int k=start[threadIdx.x]; k <= end[threadIdx.x]; k++)

{ int ynew = floor(0.5+(sqrt(R*R-k*k)));

if (ynew == yold) { d = d + 1.0; }

else

{ if (ynew < yold) { d = d + 1.41421356; }

else { flag = 1; } }

yold = ynew; }

dist[threadIdx.x] = d;

status[threadIdx.x] = flag;}

Kernel function

The function computes contour length

of a circular segment with radius ‘R’