You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
While writing a json data with a large number of records (dataframe.size() "4263942") using abfss path, seeing the Internal Server Error from adlfs package while processing the chunks of data. The error seems to be consistent and not the glitch. Having an Internal Server Error isn't helpful in this case. Can you please help in looking at the issue and if it is a valid failing scenario, can we please pop the actual error message to understand the error better?
Error:
File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:2083, in AzureBlobFile._async_upload_chunk(self, final, **kwargs)
2079 await bc.commit_block_list(
2080 block_list=block_list, metadata=self.metadata
2081 )
2082 else:
-> 2083 raise RuntimeError(f"Failed to upload block{e}!") from e
2084 elif self.mode == "ab":
2085 async with self.container_client.get_blob_client(blob=self.blob) as bc:
RuntimeError: Failed to upload blockInternal Server Error
Full Stacktrace:
RuntimeError Traceback (most recent call last)
Cell In[35], line 86
---> 86 df.to_json(filenameActivity,orient="records",lines=True)
87 print("File has been saved as", filenameActivity)
File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/core/generic.py:2650, in NDFrame.to_json(self, path_or_buf, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options)
2647 config.is_nonnegative_int(indent)
2648 indent = indent or 0
-> 2650 return json.to_json(
2651 path_or_buf=path_or_buf,
2652 obj=self,
2653 orient=orient,
2654 date_format=date_format,
2655 double_precision=double_precision,
2656 force_ascii=force_ascii,
2657 date_unit=date_unit,
2658 default_handler=default_handler,
2659 lines=lines,
2660 compression=compression,
2661 index=index,
2662 indent=indent,
2663 storage_options=storage_options,
2664 )
File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/json/_json.py:178, in to_json(path_or_buf, obj, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options)
174 s = convert_to_line_delimits(s)
176 if path_or_buf is not None:
177 # apply compression and byte/text conversion
--> 178 with get_handle(
179 path_or_buf, "w", compression=compression, storage_options=storage_options
180 ) as handles:
181 handles.handle.write(s)
182 else:
File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/common.py:133, in IOHandles.__exit__(self, *args)
132 def __exit__(self, *args: Any) -> None:
--> 133 self.close()
File ~/cluster-env/trident_env/lib/python3.10/site-packages/pandas/io/common.py:125, in IOHandles.close(self)
123 self.created_handles.remove(self.handle)
124 for handle in self.created_handles:
--> 125 handle.close()
126 self.created_handles = []
127 self.is_wrapped = False
File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:1919, in AzureBlobFile.close(self)
1917 """Close file and azure client."""
1918 asyncio.run_coroutine_threadsafe(close_container_client(self), loop=self.loop)
-> 1919 super().close()
File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/spec.py:1789, in AbstractBufferedFile.close(self)
1787 else:
1788 if not self.forced:
-> 1789 self.flush(force=True)
1791 if self.fs is not None:
1792 self.fs.invalidate_cache(self.path)
File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/spec.py:1660, in AbstractBufferedFile.flush(self, force)
1657 self.closed = True
1658 raise
-> 1660 if self._upload_chunk(final=force) is not False:
1661 self.offset += self.buffer.seek(0, 2)
1662 self.buffer = io.BytesIO()
File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:115, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
112 @functools.wraps(func)
113 def wrapper(*args, **kwargs):
114 self = obj or args[0]
--> 115 return sync(self.loop, func, *args, **kwargs)
File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:100, in sync(loop, func, timeout, *args, **kwargs)
98 raise FSTimeoutError from return_result
99 elif isinstance(return_result, BaseException):
--> 100 raise return_result
101 else:
102 return return_result
File ~/cluster-env/trident_env/lib/python3.10/site-packages/fsspec/asyn.py:55, in _runner(event, coro, result, timeout)
53 coro = asyncio.wait_for(coro, timeout=timeout)
54 try:
---> 55 result[0] = await coro
56 except Exception as ex:
57 result[0] = ex
File ~/cluster-env/trident_env/lib/python3.10/site-packages/adlfs/spec.py:2083, in AzureBlobFile._async_upload_chunk(self, final, **kwargs)
2079 await bc.commit_block_list(
2080 block_list=block_list, metadata=self.metadata
2081 )
2082 else:
-> 2083 raise RuntimeError(f"Failed to upload block{e}!") from e
2084 elif self.mode == "ab":
2085 async with self.container_client.get_blob_client(blob=self.blob) as bc:
RuntimeError: Failed to upload blockInternal Server Error
The text was updated successfully, but these errors were encountered:
While writing a json data with a large number of records (dataframe.size() "4263942") using abfss path, seeing the Internal Server Error from adlfs package while processing the chunks of data. The error seems to be consistent and not the glitch. Having an Internal Server Error isn't helpful in this case. Can you please help in looking at the issue and if it is a valid failing scenario, can we please pop the actual error message to understand the error better?
Error:
Full Stacktrace:
The text was updated successfully, but these errors were encountered: