admin管理员组文章数量:1399737
I have a struct in CUDA that contains other structs (substructs, and also these substructs has substructs themself) and pointers to dynamically allocated memory. I want to copy the entire struct, including its substructures, from device to host using cudaMemcpy. How can I do this correctly without causing shallow copies or memory issues?
I tried to copy the whole main struct but when I try to print substruct values, it returns null data. For example
gpuErrchk(cudaMemcpy(h_population, d_population, sizeof(Population), cudaMemcpyDeviceToHost));
Network* h_networks = (Network*)malloc(sizeof(Network) * population_size);
gpuErrchk(cudaMemcpy(h_networks, d_networks, sizeof(Network) * population_size,cudaMemcpyDeviceToHost));
h_population->Networks = h_networks;
std::cout << "Num Connections: " << h_population->Networks[0].Connections[13].weight << std::endl;
There isn't any output being printed.
I tried all the pointer call combinations ((&, ), (&, &), ( , &), ( , ) ) but none of them worked. Also suggested solution link is saying Doing so means you only have to copy the array to the device, not the structure. (may not be efficient for complex structures)
but ofc commentors didn't read it.
Structs;
struct Connection {
int innovationid;
int from;
int to;
float weight;
bool enabled;
};
struct Neuron {
int type;
float input_sum;
float bias;
float output;
};
struct Network {
Connection* Connections;
Neuron* Neurons;
int num_connections;
int num_neurons;
float fitness;
};
struct Population {
Network* Networks;
int num_networks;
int generation_id;
};
Kernel function
__global__ void CreateBasePopulation(Population* pop, int pop_num, int input_num, int output_num) {
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx >= pop_num) return;
Network* net = &pop->Networks[idx];
net->num_neurons = input_num + output_num;
net->num_connections = input_num * output_num;
net->fitness = 0.0f;
curandState state;
curand_init(clock64(), idx, 0, &state);
cudaMalloc(&(net->Neurons), sizeof(Neuron) * net->num_neurons);
cudaMalloc(&(net->Connections), sizeof(Connection) * net->num_connections);
for (int i = 0; i < output_num; ++i) {
net->Neurons[i].type = 2;
net->Neurons[i].bias = ((2.0f * sqrtf((float)input_num) * curand_uniform(&state)) - sqrtf((float)input_num)) / output_num;
net->Neurons[i].output = 0.0f;
net->Neurons[i].input_sum = 0.0f;
}
for (int i = 0; i < input_num; ++i) {
net->Neurons[i].type = 0;
net->Neurons[i].bias = 0.0f;
net->Neurons[i].output = 0.0f;
net->Neurons[i].input_sum = 0.0f;
for (int j = 0; j < output_num; ++j) {
int offset = j + (output_num * i);
net->Connections[offset].from = i;
net->Connections[offset].to = j;
net->Connections[offset].innovationid = offset;
net->Connections[offset].enabled = true;
net->Connections[offset].weight = (2.0f * curand_uniform(&state)) - 1.0f;
}
}
}
Memory allocation and memcpy steps at host
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort)
exit(code);
}
}
int main() {
int population_size = 1024;
int input_num = 390;
int output_num = 3;
size_t heap_size_needed = population_size * ((input_num + output_num) * sizeof(Neuron) +
input_num * output_num * sizeof(Connection));
size_t heap_size = heap_size_needed + heap_size_needed /5;
gpuErrchk(cudaDeviceSetLimit(cudaLimitMallocHeapSize, heap_size));
Population* h_population = (Population*)malloc(sizeof(Population));
if (!h_population) {
std::cerr << "Failed to allocate host memory for population!" << std::endl;
return -1;
}
h_population->num_networks = population_size;
h_population->generation_id = 1;
Population* d_population;
gpuErrchk(cudaMalloc(&d_population, sizeof(Population)));
Network* d_networks;
gpuErrchk(cudaMalloc(&d_networks, sizeof(Network) * population_size));
gpuErrchk(cudaMemcpy(&(h_population->Networks), &d_networks, sizeof(Network*), cudaMemcpyHostToHost));
gpuErrchk(cudaMemcpy(d_population, h_population, sizeof(Population), cudaMemcpyHostToDevice));
int threadsPerBlock = 512;
int blocks = (population_size + threadsPerBlock - 1) / threadsPerBlock;
CreateBasePopulation<<<blocks, threadsPerBlock>>>(d_population, population_size, input_num, output_num);
gpuErrchk(cudaGetLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(h_population, d_population, sizeof(Population), cudaMemcpyDeviceToHost));
Network* h_networks = (Network*)malloc(sizeof(Network) * population_size);
gpuErrchk(cudaMemcpy(h_networks, d_networks, sizeof(Network) * population_size, cudaMemcpyDeviceToHost));
h_population->Networks = h_networks;
for (int i = 0; i < population_size; i++) {
//Connection* d_connections;
//gpuErrchk(cudaMemcpy(&d_connections, &(d_networks[i].Connections), sizeof(Connection*), cudaMemcpyDeviceToHost));
int num_connections = h_networks[i].num_connections;
Connection* d_connections = nullptr;
gpuErrchk(cudaMemcpy(&d_connections, &(d_networks[i].Connections), sizeof(Connection*), cudaMemcpyDeviceToHost));
if (d_connections != nullptr) {
int num_connections = h_networks[i].num_connections;
Connection* h_connections = (Connection*)malloc(sizeof(Connection) * num_connections);
gpuErrchk(cudaMemcpy(h_connections, d_connections, sizeof(Connection) * num_connections, cudaMemcpyDeviceToHost));
h_networks[i].Connections = h_connections;
} else {
std::cerr << "Error: d_connections is a null pointer" << std::endl;
}
}
std::cout << "Population created successfully!\n";
std::cout << "Num Connections (GPU to CPU memcpy): " << h_population->Networks[12].Connections[13].weight << std::endl;
gpuErrchk(cudaFree(d_networks));
gpuErrchk(cudaFree(d_population));
free(h_population);
return 0;
}
Error output
GPUassert: invalid argument e.cu 68
Line 68
gpuErrchk(cudaMemcpy(h_connections, d_connections, sizeof(Connection) * num_connections, cudaMemcpyDeviceToHost));
本文标签: cCUDA copy substructsStack Overflow
版权声明:本文标题:c++ - CUDA copy substructs - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1744123901a2591866.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论