diff --git a/tests/test_cpython_interface.py b/tests/test_cpython_interface.py index 12ae4d4..3210479 100644 --- a/tests/test_cpython_interface.py +++ b/tests/test_cpython_interface.py @@ -142,10 +142,20 @@ def test_small_example_overrun(self): s = encoder.encode(0, 1024) minlen = len(s) for length in range(minlen): - with pytest.raises(ValueError, match="-101"): + with pytest.raises(_vcztools.VczBufferTooSmall, match="-101"): encoder.encode(0, length) assert s == encoder.encode(0, minlen) + @pytest.mark.parametrize("variant", [0, 999, 333, 501]) + def test_large_example_overrun(self, variant): + encoder = example_encoder(1000, 100) + s = encoder.encode(variant, 1024 * 1024) + minlen = len(s) + for length in range(minlen): + with pytest.raises(_vcztools.VczBufferTooSmall, match="-101"): + encoder.encode(variant, length) + assert s == encoder.encode(variant, minlen) + class TestFixedFieldInputChecking: @pytest.mark.parametrize("name", FIXED_FIELD_NAMES) diff --git a/vcztools/_vcztoolsmodule.c b/vcztools/_vcztoolsmodule.c index 5cc8378..0e80f7f 100644 --- a/vcztools/_vcztoolsmodule.c +++ b/vcztools/_vcztoolsmodule.c @@ -17,11 +17,20 @@ typedef struct { } VcfEncoder; // clang-format on +static PyObject *VczBufferTooSmall; + static void handle_library_error(int err) { - // TODO generate string messages. - PyErr_Format(PyExc_ValueError, "Error occured: %d: ", err); + switch (err) { + case VCZ_ERR_BUFFER_OVERFLOW: + PyErr_Format( + VczBufferTooSmall, "Error: %d; specified buffer size is too small", err); + break; + // TODO handle the other error types. + default: + PyErr_Format(PyExc_ValueError, "Error occured: %d: ", err); + } } static FILE * @@ -576,6 +585,10 @@ PyInit__vcztools(void) /* Initialise numpy */ import_array(); + VczBufferTooSmall = PyErr_NewException("_vcztools.VczBufferTooSmall", NULL, NULL); + Py_INCREF(VczBufferTooSmall); + PyModule_AddObject(module, "VczBufferTooSmall", VczBufferTooSmall); + /* VcfEncoder type */ if (PyType_Ready(&VcfEncoderType) < 0) { return NULL; diff --git a/vcztools/vcf_writer.py b/vcztools/vcf_writer.py index b383df1..54fe78e 100644 --- a/vcztools/vcf_writer.py +++ b/vcztools/vcf_writer.py @@ -258,9 +258,18 @@ def c_chunk_to_vcf(root, v_chunk, contigs, filters, output): if len(array.shape) == 2: array = array.reshape((num_variants, num_samples, 1)) encoder.add_format_field(name, array) - + # TODO: (1) make a guess at this based on number of fields and samples, + # and (2) log a DEBUG message when we have to double. + buflen = 1024 for j in range(num_variants): - line = encoder.encode(j, 2**20) + failed = True + while failed: + try: + line = encoder.encode(j, buflen) + failed = False + except _vcztools.VczBufferTooSmall: + buflen *= 2 + # print("Bumping buflen to", buflen) print(line, file=output)