-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Read after write() but before wait_all() causes seg faults #185
Comments
Hi @wangvsa, I think the problem is with the "printf("rank %d read success: %s!\n", mpi_rank, data_in[0]);" in your code, if you change it to "printf("rank %d read success: %s!\n", mpi_rank, data_in);" then it will run ok. |
Oops, that was a mistake. I updated the code above, can you give it another try? One process is enough. |
@wangvsa, can you try the wait_all_fix branch and see if it fixes your problem? Please also double check the data is read and written correctly. |
Thanks @houjun ! That fixed the bug. However, I encountered another issue with the following pattern: Code to reproduce the issue: #include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include "mpi.h"
#include "pdc.h"
/**
* write -> read -> wait_all()
*
*/
int mpi_rank, mpi_size;
MPI_Comm mpi_comm;
void write_read_wait_all(pdcid_t obj_id, int iterations) {
pdcid_t region_local, region_remote;
pdcid_t transfer_request;
perr_t ret;
int ndim = 1;
uint64_t offset_local = 0;
uint64_t offset_remote = 0;
uint64_t chunk_size = 2880;
char* data_out = (char*) malloc(chunk_size * sizeof(char));
memset(data_out, 'a', chunk_size*sizeof(char));
pdcid_t* tids = (pdcid_t*)malloc(sizeof(pdcid_t) * (iterations));
for(int i = 0; i < iterations; i++) {
region_local = PDCregion_create(ndim, &offset_local, &chunk_size);
region_remote = PDCregion_create(ndim, &offset_remote, &chunk_size);
offset_remote += chunk_size;
tids[i] = PDCregion_transfer_create(data_out, PDC_WRITE, obj_id, region_local, region_remote);
if (tids[i] == 0)
printf("transfer request creation failed\n");
ret = PDCregion_transfer_start(tids[i]);
if (ret != SUCCEED)
printf("Failed to start transfer\n");
}
printf("rank %d read before wait_all()\n", mpi_rank);
fflush(stdout);
char* data_in = (char*) malloc(chunk_size * sizeof(char));
offset_local = 0;
offset_remote = 0;
region_local = PDCregion_create(ndim, &offset_local, &chunk_size);
region_remote = PDCregion_create(ndim, &offset_remote, &chunk_size);
pdcid_t read_tid = PDCregion_transfer_create(data_in, PDC_READ, obj_id, region_local, region_remote);
ret = PDCregion_transfer_start(read_tid);
ret = PDCregion_transfer_wait(read_tid);
ret = PDCregion_transfer_close(read_tid);
printf("rank %d read success!\n", mpi_rank);
fflush(stdout);
// Write more
int N = 10;
pdcid_t *tids2 = (pdcid_t*) malloc(sizeof(pdcid_t) * N);
offset_local = 0;
offset_remote = iterations*chunk_size; // start from the end of the last write
for(int i = 0; i < N; i++) {
region_local = PDCregion_create(ndim, &offset_local, &chunk_size);
region_remote = PDCregion_create(ndim, &offset_remote, &chunk_size);
offset_remote += chunk_size;
tids2[i] = PDCregion_transfer_create(data_out, PDC_WRITE, obj_id, region_local, region_remote);
if (tids2[i] == 0)
printf("transfer request creation failed\n");
ret = PDCregion_transfer_start(tids2[i]);
if (ret != SUCCEED)
printf("Failed to start transfer\n");
}
printf("rank %d call wait_all on tids.\n", mpi_rank);
fflush(stdout);
ret = PDCregion_transfer_wait_all(tids, (iterations));
if (ret != SUCCEED)
printf("Failed to transfer wait\n");
printf("rank %d call wait_all on tids2.\n", mpi_rank);
fflush(stdout);
ret = PDCregion_transfer_wait_all(tids2, (N));
if (ret != SUCCEED)
printf("Failed to transfer wait\n");
for(int i = 0; i < iterations; i++) {
ret = PDCregion_transfer_close(tids[i]);
if (ret != SUCCEED)
printf("region transfer close failed\n");
}
for(int i = 0; i < N; i++) {
ret = PDCregion_transfer_close(tids2[i]);
if (ret != SUCCEED)
printf("region transfer close failed\n");
}
free(data_in);
free(data_out);
free(tids);
free(tids2);
}
int main(int argc, char** argv) {
pdcid_t pdc_id, cont_prop, cont_id;
pdcid_t obj_prop, obj_id;
uint64_t dims[1] = {PDC_SIZE_UNLIMITED};
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_dup(MPI_COMM_WORLD, &mpi_comm);
// create a pdc
pdc_id = PDCinit("pdc");
// create a container property
cont_prop = PDCprop_create(PDC_CONT_CREATE, pdc_id);
if (cont_prop <= 0) {
printf("Fail to create container property @ line %d!\n", __LINE__);
}
// create a container
cont_id = PDCcont_create_col("c1", cont_prop);
if (cont_id <= 0) {
printf("Fail to create container @ line %d!\n", __LINE__);
}
// create an object property
obj_prop = PDCprop_create(PDC_OBJ_CREATE, pdc_id);
PDCprop_set_obj_dims(obj_prop, 1, dims);
PDCprop_set_obj_type(obj_prop, PDC_CHAR);
PDCprop_set_obj_time_step(obj_prop, 0);
PDCprop_set_obj_user_id(obj_prop, getuid());
PDCprop_set_obj_app_name(obj_prop, "producer");
char obj_name[100] = {0};
sprintf(obj_name, "obj-var-%d", mpi_rank);
PDCprop_set_obj_tags(obj_prop, obj_name);
obj_id = PDCobj_create(cont_id, obj_name, obj_prop);
write_read_wait_all(obj_id, 1000);
if (PDCobj_close(obj_id) < 0) {
printf("fail to close obj_id\n");
}
if (PDCprop_close(cont_prop) < 0) {
printf("Fail to close property @ line %d\n", __LINE__);
}
if (PDCclose(pdc_id) < 0) {
printf("fail to close PDC\n");
}
MPI_Finalize();
} |
In my debugging the "Attempt to wait for a transfer request that has not been started." is from PDCobj_close(), in which PDC tries to clean up the transfer requests on an object. |
@houjun could you double check if this has been fixed? |
Bug Report
I'm trying to optimize the CFITSIO PDC driver for Montage. Some Montage components perform a large number of small writes followed by a single read on the same file.
All I/O operations are file-per-process. So we don't need to perform a wait() for each write() call. We can do a wait_all() at the flush/close time to reduce overhead.
The issue is that the read occurs before wait_all() sometimes can cause seg fault.
write->write->write->...->read->write_all()
To Reproduce
I attached a simple code to help debugging. Run it on a single node with a small number of processes will trigger the seg fault.
The text was updated successfully, but these errors were encountered: