Skip to content

Commit

Permalink
orjson.Fragment
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jun 1, 2023
1 parent 96522e8 commit dcab26b
Show file tree
Hide file tree
Showing 17 changed files with 1,328 additions and 12 deletions.
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ available in the repository.
4. [Serialize](https://github.com/ijl/orjson#serialize)
1. [default](https://github.com/ijl/orjson#default)
2. [option](https://github.com/ijl/orjson#option)
3. [Fragment](https://github.com/ijl/orjson#fragment)
5. [Deserialize](https://github.com/ijl/orjson#deserialize)
2. [Types](https://github.com/ijl/orjson#types)
1. [dataclass](https://github.com/ijl/orjson#dataclass)
Expand Down Expand Up @@ -137,10 +138,10 @@ def dumps(
`dumps()` serializes Python objects to JSON.

It natively serializes
`str`, `dict`, `list`, `tuple`, `int`, `float`, `bool`,
`str`, `dict`, `list`, `tuple`, `int`, `float`, `bool`, `None`,
`dataclasses.dataclass`, `typing.TypedDict`, `datetime.datetime`,
`datetime.date`, `datetime.time`, `uuid.UUID`, `numpy.ndarray`, and
`None` instances. It supports arbitrary types through `default`. It
`orjson.Fragment` instances. It supports arbitrary types through `default`. It
serializes subclasses of `str`, `int`, `dict`, `list`,
`dataclasses.dataclass`, and `enum.Enum`. It does not serialize subclasses
of `tuple` to avoid serializing `namedtuple` objects as arrays. To avoid
Expand Down Expand Up @@ -572,6 +573,30 @@ b'"1970-01-01T00:00:00+00:00"'
b'"1970-01-01T00:00:00Z"'
```

#### Fragment

`orjson.Fragment` includes already-serialized JSON in a document. This is an
efficient way include JSON blobs from a cache, JSONB field, or separately
serialized object without first deserializing to Python objects via `loads()`.

```python
>>> import orjson
>>> orjson.dumps({"key": "zxc", "data": orjson.Fragment(b'{"a": "b", "c": 1}')})
b'{"key":"zxc","data":{"a": "b", "c": 1}}'
```

It does no reformatting: `orjson.OPT_INDENT_2` will not affect a
compact blob nor will a pretty-printed JSON blob be rewritten as compact.

The input must be `bytes` or `str` and given as a positional argument.

This raises `orjson.JSONEncodeError` if a `str` is given and the input is
not valid UTF-8. It otherwise does no validation and it is possible to
write invalid JSON. This does not escape characters. The implementation is
tested to not crash if given invalid strings or invalid JSON.

This is similar to `RawJSON` in rapidjson.

### Deserialize

```python
Expand Down
2 changes: 2 additions & 0 deletions integration/typestubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
import orjson

orjson.JSONDecodeError(msg="the_msg", doc="the_doc", pos=1)

orjson.dumps(orjson.Fragment(b"{}"))
22 changes: 14 additions & 8 deletions integration/wsgi.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import lzma
import os
from datetime import datetime
from uuid import uuid4

from flask import Flask

import orjson

app = Flask(__name__)

filename = os.path.join(os.path.dirname(__file__), "..", "data", "twitter.json.xz")

with lzma.open(filename, "r") as fileh:
DATA = orjson.loads(fileh.read())
NOW = datetime.utcnow()


@app.route("/")
def root():
data = orjson.dumps(DATA)
data = {
"uuid": uuid4(),
"updated_at": NOW,
"data": [1, 2.2, None, True, False, orjson.Fragment(b"{}")],
}
payload = orjson.dumps(
data, option=orjson.OPT_NAIVE_UTC | orjson.OPT_OMIT_MICROSECONDS
)
return app.response_class(
response=data, status=200, mimetype="application/json; charset=utf-8"
response=payload,
status=200,
mimetype="application/json; charset=utf-8",
)
1 change: 1 addition & 0 deletions pysrc/orjson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
__all__ = (
"__version__",
"dumps",
"Fragment",
"JSONDecodeError",
"JSONEncodeError",
"loads",
Expand Down
3 changes: 3 additions & 0 deletions pysrc/orjson/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ def loads(__obj: Union[bytes, bytearray, memoryview, str]) -> Any: ...
class JSONDecodeError(json.JSONDecodeError): ...
class JSONEncodeError(TypeError): ...

class Fragment(tuple):
contents: Union[bytes, str]

OPT_APPEND_NEWLINE: int
OPT_INDENT_2: int
OPT_NAIVE_UTC: int
Expand Down
136 changes: 136 additions & 0 deletions src/ffi/fragment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use pyo3_ffi::*;
use std::os::raw::{c_char, c_ulong};
use std::ptr::null_mut;

// https://docs.python.org/3/c-api/typeobj.html#typedef-examples

#[repr(C)]
pub struct Fragment {
pub ob_refcnt: pyo3_ffi::Py_ssize_t,
pub ob_type: *mut pyo3_ffi::PyTypeObject,
pub contents: *mut pyo3_ffi::PyObject,
}

#[cold]
#[inline(never)]
#[cfg_attr(feature = "optimize", optimize(size))]
fn raise_args_exception() -> *mut PyObject {
unsafe {
let msg = "orjson.Fragment() takes exactly 1 positional argument";
let err_msg =
PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize);
PyErr_SetObject(PyExc_TypeError, err_msg);
Py_DECREF(err_msg);
};
null_mut()
}

#[no_mangle]
#[cold]
#[cfg_attr(feature = "optimize", optimize(size))]
pub unsafe extern "C" fn orjson_fragment_tp_new(
_subtype: *mut PyTypeObject,
args: *mut PyObject,
kwds: *mut PyObject,
) -> *mut PyObject {
if Py_SIZE(args) != 1 || !kwds.is_null() {
raise_args_exception();
null_mut()
} else {
let contents = PyTuple_GET_ITEM(args, 0);
Py_INCREF(contents);
let obj = Box::new(Fragment {
ob_refcnt: 1,
ob_type: crate::typeref::FRAGMENT_TYPE,
contents: contents,
});
Box::into_raw(obj) as *mut PyObject
}
}

#[no_mangle]
#[cold]
#[cfg_attr(feature = "optimize", optimize(size))]
pub unsafe extern "C" fn orjson_fragment_dealloc(object: *mut PyObject) {
Py_DECREF((*(object as *mut Fragment)).contents);
std::alloc::dealloc(object as *mut u8, std::alloc::Layout::new::<Fragment>());
}

#[cfg(Py_3_10)]
const FRAGMENT_TP_FLAGS: c_ulong = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE;

#[cfg(not(Py_3_10))]
const FRAGMENT_TP_FLAGS: c_ulong = Py_TPFLAGS_DEFAULT;

#[no_mangle]
#[cold]
#[cfg_attr(feature = "optimize", optimize(size))]
pub unsafe extern "C" fn orjson_fragmenttype_new() -> *mut PyTypeObject {
let ob = Box::new(PyTypeObject {
ob_base: PyVarObject {
ob_base: PyObject {
ob_refcnt: 0,
ob_type: std::ptr::addr_of_mut!(PyType_Type),
},
ob_size: 0,
},
tp_name: "orjson.Fragment\0".as_ptr() as *const c_char,
tp_basicsize: std::mem::size_of::<Fragment>() as isize,
tp_itemsize: 0,
tp_dealloc: Some(orjson_fragment_dealloc),
tp_init: None,
tp_new: Some(orjson_fragment_tp_new),
tp_flags: FRAGMENT_TP_FLAGS,
// ...
tp_bases: null_mut(),
tp_cache: null_mut(),
tp_del: None,
tp_finalize: None,
tp_free: None,
tp_is_gc: None,
tp_mro: null_mut(),
tp_subclasses: null_mut(),
#[cfg(Py_3_8)]
tp_vectorcall: None,
tp_version_tag: 0,
tp_weaklist: null_mut(),
#[cfg(not(Py_3_9))]
tp_print: None,
#[cfg(Py_3_8)]
tp_vectorcall_offset: 0,
tp_getattr: None,
tp_setattr: None,
tp_as_async: null_mut(),
tp_repr: None,
tp_as_number: null_mut(),
tp_as_sequence: null_mut(),
tp_as_mapping: null_mut(),
tp_hash: None,
tp_call: None,
tp_str: None,
tp_getattro: None,
tp_setattro: None,
tp_as_buffer: null_mut(),
tp_doc: std::ptr::null_mut(),
tp_traverse: None,
tp_clear: None,
tp_richcompare: None,
tp_weaklistoffset: 0,
tp_iter: None,
tp_iternext: None,
tp_methods: null_mut(),
tp_members: null_mut(),
tp_getset: null_mut(),
tp_base: null_mut(),
tp_dict: null_mut(),
tp_descr_get: None,
tp_descr_set: None,
tp_dictoffset: 0,
tp_alloc: None,
});
let ob_ptr = Box::into_raw(ob);
PyType_Ready(ob_ptr);
ob_ptr
}
2 changes: 2 additions & 0 deletions src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
mod buffer;
mod bytes;
mod dict;
mod fragment;
mod list;

pub use buffer::*;
pub use bytes::*;
pub use dict::*;
pub use fragment::{orjson_fragmenttype_new, Fragment};
pub use list::PyListIter;
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ pub unsafe extern "C" fn orjson_init_exec(mptr: *mut PyObject) -> c_int {
add!(mptr, "loads\0", func);
}

add!(mptr, "Fragment\0", typeref::FRAGMENT_TYPE as *mut PyObject);

opt!(mptr, "OPT_APPEND_NEWLINE\0", opt::APPEND_NEWLINE);
opt!(mptr, "OPT_INDENT_2\0", opt::INDENT_2);
opt!(mptr, "OPT_NAIVE_UTC\0", opt::NAIVE_UTC);
Expand Down
1 change: 1 addition & 0 deletions src/serialize/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ impl DictNonStrKey {
| ObType::Dict
| ObType::List
| ObType::Dataclass
| ObType::Fragment
| ObType::Unknown => Err(SerializeError::DictKeyInvalidType),
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/serialize/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub enum SerializeError {
Integer53Bits,
Integer64Bits,
InvalidStr,
InvalidFragment,
KeyMustBeStr,
RecursionLimit,
TimeHasTzinfo,
Expand All @@ -33,6 +34,7 @@ impl std::fmt::Display for SerializeError {
SerializeError::Integer53Bits => write!(f, "Integer exceeds 53-bit range"),
SerializeError::Integer64Bits => write!(f, "Integer exceeds 64-bit range"),
SerializeError::InvalidStr => write!(f, "{}", INVALID_STR),
SerializeError::InvalidFragment => write!(f, "orjson.Fragment's content is not of type bytes or str"),
SerializeError::KeyMustBeStr => write!(f, "Dict key must be str"),
SerializeError::RecursionLimit => write!(f, "Recursion limit reached"),
SerializeError::TimeHasTzinfo => write!(f, "datetime.time must not have tzinfo set"),
Expand Down
50 changes: 50 additions & 0 deletions src/serialize/fragment.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

use crate::ffi::{Fragment, PyBytes_AS_STRING, PyBytes_GET_SIZE};
use crate::serialize::error::*;
use crate::str::unicode_to_str;
use crate::typeref::{BYTES_TYPE, STR_TYPE};

use serde::ser::{Serialize, Serializer};

#[repr(transparent)]
pub struct FragmentSerializer {
ptr: *mut pyo3_ffi::PyObject,
}

impl FragmentSerializer {
pub fn new(ptr: *mut pyo3_ffi::PyObject) -> Self {
FragmentSerializer { ptr: ptr }
}
}

impl Serialize for FragmentSerializer {
#[cold]
#[inline(never)]
#[cfg_attr(feature = "optimize", optimize(size))]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let buffer: &[u8];
unsafe {
let fragment: *mut Fragment = self.ptr as *mut Fragment;
let ob_type = ob_type!((*fragment).contents);
if ob_type == BYTES_TYPE {
buffer = std::slice::from_raw_parts(
PyBytes_AS_STRING((*fragment).contents) as *const u8,
PyBytes_GET_SIZE((*fragment).contents) as usize,
);
} else if ob_type == STR_TYPE {
let uni = unicode_to_str((*fragment).contents);
if unlikely!(uni.is_none()) {
err!(SerializeError::InvalidStr)
}
buffer = uni.unwrap().as_bytes();
} else {
err!(SerializeError::InvalidFragment)
}
}
serializer.serialize_bytes(buffer)
}
}
6 changes: 4 additions & 2 deletions src/serialize/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ where
format_escaped_str(&mut self.writer, &mut self.formatter, value).map_err(Error::io)
}

fn serialize_bytes(self, _value: &[u8]) -> Result<()> {
unreachable!();
fn serialize_bytes(self, value: &[u8]) -> Result<()> {
self.writer.reserve(value.len());
unsafe { self.writer.write_reserved_fragment(value).unwrap() };
Ok(())
}

#[inline]
Expand Down
1 change: 1 addition & 0 deletions src/serialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ mod default;
mod dict;
mod error;
mod float;
mod fragment;
mod int;
mod json;
mod list;
Expand Down
Loading

0 comments on commit dcab26b

Please sign in to comment.