-
Notifications
You must be signed in to change notification settings - Fork 38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add c14n for node and document #138
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
//! Shared canonicalization logic and types. | ||
//! | ||
use std::ffi::c_int; | ||
|
||
use crate::bindings::{ | ||
xmlC14NMode_XML_C14N_1_0, xmlC14NMode_XML_C14N_1_1, xmlC14NMode_XML_C14N_EXCLUSIVE_1_0, | ||
}; | ||
|
||
/// Options for configuring how to canonicalize XML | ||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] | ||
pub struct CanonicalizationOptions { | ||
/// Canonicalization specification to use | ||
pub mode: CanonicalizationMode, | ||
/// If true, keep `<!-- ... -->` comments, otherwise remove | ||
pub with_comments: bool, | ||
/// Namespaces to keep even if they are unused. By default, in [CanonicalizationMode::ExclusiveCanonical1_0], unused namespaces are removed. | ||
/// | ||
/// Doesn't apply to other canonicalization modes. | ||
pub inclusive_ns_prefixes: Vec<String>, | ||
} | ||
|
||
/// Canonicalization specification to use | ||
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had implemented this as follows: /// Canonicalization mode for [`Document.c14n`]
pub enum C14NMode {
/// [XML_C14N_1_0](https://www.w3.org/TR/2001/REC-xml-c14n-20010315)
Mode1_0,
/// [XML_C14N_1_1](https://www.w3.org/TR/xml-c14n11/)
Mode1_1,
/// [XML_C14N_EXCLUSIVE_1_0](https://www.w3.org/TR/xml-exc-c14n/)
ModeExclusive1_0(Vec<CString>),
}
impl C14NMode {
fn as_c_int(&self) -> c_int {
match self {
C14NMode::Mode1_0 => xmlC14NMode_XML_C14N_1_0 as c_int,
C14NMode::Mode1_1 => xmlC14NMode_XML_C14N_1_1 as c_int,
C14NMode::ModeExclusive1_0(_) => xmlC14NMode_XML_C14N_EXCLUSIVE_1_0 as c_int,
}
}
} It doesn't map the C API 1:1, but makes it clear the |
||
pub enum CanonicalizationMode { | ||
/// Original C14N 1.0 spec | ||
Canonical1_0, | ||
/// Exclusive C14N 1.0 spec | ||
#[default] | ||
ExclusiveCanonical1_0, | ||
/// C14N 1.1 spec | ||
Canonical1_1, | ||
} | ||
|
||
impl From<CanonicalizationMode> for c_int { | ||
fn from(mode: CanonicalizationMode) -> Self { | ||
let c14n_mode = match mode { | ||
CanonicalizationMode::Canonical1_0 => xmlC14NMode_XML_C14N_1_0, | ||
CanonicalizationMode::ExclusiveCanonical1_0 => xmlC14NMode_XML_C14N_EXCLUSIVE_1_0, | ||
CanonicalizationMode::Canonical1_1 => xmlC14NMode_XML_C14N_1_1, | ||
}; | ||
|
||
c_int::from(c14n_mode as i32) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -343,3 +343,5 @@ impl Document { | |
Ok(()) | ||
} | ||
} | ||
|
||
mod c14n; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
//! Document canonicalization logic | ||
//! | ||
use std::ffi::{c_int, c_void, CString}; | ||
use std::os::raw; | ||
use std::ptr::null_mut; | ||
|
||
use crate::tree::c14n::*; | ||
|
||
use super::{ | ||
xmlAllocOutputBuffer, xmlC14NExecute, xmlC14NIsVisibleCallback, xmlChar, xmlNodePtr, | ||
xmlOutputBufferClose, xmlOutputBufferPtr, Document, | ||
}; | ||
|
||
impl Document { | ||
/// Canonicalize a document and return the results. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This docstring could use a more information or a link to the libxml2 documentation (which isn't that great either). The callback parameter is obvious and could benefit from an examples |
||
pub fn canonicalize( | ||
&self, | ||
options: CanonicalizationOptions, | ||
callback: Option<(xmlNodePtr, xmlC14NIsVisibleCallback)>, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had implemented the callback functionality as follows: type IsVisibleCallback = Box<dyn Fn(&RoNode, &RoNode) -> bool>;
// thin pointer wrapper that calls supplied the callback instead
unsafe extern "C" fn _is_visible_wrapper(
data: *mut c_void,
node: xmlNodePtr,
parent: xmlNodePtr,
) -> c_int {
let callback = unsafe { &mut *(data as *mut IsVisibleCallback) };
// handling of parent nodes etc.
// …
(callback)(&RoNode(node), &RoNode(parent))) as c_int
}
impl Document{
/// Canonicalizes the XML document according to the W3C XML Canonicalization specification.
///
/// This method produces a canonical form of the XML document, which is useful for digital signatures
/// and document comparison. The canonicalization process ensures consistent representation of the XML content.
pub fn c14n(
&self,
mode: C14NMode,
with_comments: bool,
) -> Result<String, ()> {
self.c14n_with_visibility_callback(None, mode, with_comments)
}
/// Canonicalizes the document with an optional visibility callback
///
/// `is_visible_callback(node: &RoNode, parent: &RoNode)` is called for every
/// node having a parent, returning true if the node should be included in the
/// canonicalized output.
pub fn c14n_with_visibility_callback(
&self,
is_visible_callback: Option<IsVisibleCallback>,
mode: C14NMode,
with_comments: bool,
) -> Result<String, ()> {
// boxes the callback so it can be passed as a void pointer to [`_is_visible_wrapper`]
let (is_visible_fn, mut user_data) = match is_visible_callback {
Some(f) => (
Some(_is_visible_wrapper as unsafe extern "C" fn(_, _, _) -> _),
Some(Box::into_raw(f)),
),
None => (None, None),
};
let c14n_res = xmlC14NExecute(
self.doc_ptr(),
is_visible_fn,
user_data
.as_mut()
.map(|s| ptr::from_mut(s))
.unwrap_or(ptr::null_mut()) as *mut c_void,
mode.as_c_int(),
inclusive_ns_prefixes,
with_comments as c_int,
xmlOutputBufferCreateBuffer(buffer, ptr::null_mut()),
);
// …
}
} Usage looks like this: let input = r#"<ns1:root><ns2:foo x="1" a="2"/><!--cmt--><a/><b/></ns1:root>"#;
let callback = |_node: &RoNode, _parent: &RoNode| {
!(_parent.get_name() == "ns1:root" && _node.get_name() == "a")
};
let c14n_result = doc.c14n_with_visibility_callback(
Some(Box::new(callback)),
libxml::tree::document::C14NMode::Mode1_1,
false,
); It's a lot more flexible, (I assume) a lot more complex once the ancestor nodes are handled probably as in your PR |
||
) -> Result<String, ()> { | ||
let document = (*self.0).borrow().doc_ptr; | ||
|
||
let mut ns_list_c = to_xml_string_vec(options.inclusive_ns_prefixes); | ||
let inclusive_ns_prefixes = ns_list_c.as_mut_ptr(); | ||
let with_comments = c_int::from(options.with_comments); | ||
|
||
let (is_visible_callback, user_data) = if let Some((node_ptr, visibility_callback)) = callback { | ||
(visibility_callback, node_ptr as *mut _) | ||
} else { | ||
(None, null_mut()) | ||
}; | ||
|
||
let mode = options.mode.into(); | ||
unsafe { | ||
let c_obuf = create_output_buffer(); | ||
|
||
let status = xmlC14NExecute( | ||
document, | ||
is_visible_callback, | ||
user_data, | ||
mode, | ||
inclusive_ns_prefixes, | ||
with_comments, | ||
c_obuf, | ||
); | ||
|
||
let res = c_obuf_into_output(c_obuf); | ||
|
||
if status < 0 { | ||
Err(()) | ||
} else { | ||
Ok(res) | ||
} | ||
} | ||
} | ||
} | ||
|
||
unsafe fn c_obuf_into_output(c_obuf: xmlOutputBufferPtr) -> String { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My first approach looked like this: let buffer = xmlBufferCreate();
let c14n_res = xmlC14NExecute(
…,
xmlOutputBufferCreateBuffer(buffer, ptr::null_mut()),
);
let result = xmlBufferContent(buffer);
let c_string = CStr::from_ptr(result as *const c_char);
let node_string = c_string.to_string_lossy().
xmlBufferFree(
Ok(node_string) It's shorter and fewer additional functions, but I'm probably missing some edge cases / needed error handling. |
||
let ctx_ptr = (*c_obuf).context; | ||
let output = Box::from_raw(ctx_ptr as *mut String); | ||
|
||
(*c_obuf).context = std::ptr::null_mut::<c_void>(); | ||
|
||
xmlOutputBufferClose(c_obuf); | ||
|
||
*output | ||
} | ||
|
||
unsafe fn create_output_buffer() -> xmlOutputBufferPtr { | ||
let output = String::new(); | ||
let ctx_ptr = Box::into_raw(Box::new(output)); | ||
let encoder = std::ptr::null_mut(); | ||
|
||
let buf = xmlAllocOutputBuffer(encoder); | ||
|
||
(*buf).writecallback = Some(xml_write_io); | ||
(*buf).closecallback = Some(xml_close_io); | ||
(*buf).context = ctx_ptr as _; | ||
|
||
buf | ||
} | ||
|
||
unsafe extern "C" fn xml_close_io(_context: *mut raw::c_void) -> raw::c_int { | ||
0 | ||
} | ||
|
||
unsafe extern "C" fn xml_write_io( | ||
io_ptr: *mut raw::c_void, | ||
buffer: *const raw::c_char, | ||
len: raw::c_int, | ||
) -> raw::c_int { | ||
if io_ptr.is_null() { | ||
0 | ||
} else { | ||
let buf = std::slice::from_raw_parts_mut(buffer as *mut u8, len as usize); | ||
let buf = String::from_utf8_lossy(buf); | ||
let s2_ptr = io_ptr as *mut String; | ||
String::push_str(&mut *s2_ptr, &buf); | ||
|
||
len | ||
} | ||
} | ||
|
||
/// Create a [Vec] of null-terminated [*mut xmlChar] strings | ||
fn to_xml_string_vec(vec: Vec<String>) -> Vec<*mut xmlChar> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Depending on the audience it might be feasible to require the inclusive namespaces to be a |
||
vec | ||
.into_iter() | ||
.map(|s| CString::new(s).unwrap().into_raw() as *mut xmlChar) | ||
.chain(std::iter::once(std::ptr::null_mut())) | ||
.collect() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
//! The tree functionality | ||
//! | ||
|
||
pub mod c14n; | ||
pub mod document; | ||
pub mod namespace; | ||
pub mod node; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
//! Node canonicalization logic | ||
//! | ||
use std::ffi::c_void; | ||
|
||
use crate::{ | ||
bindings::{xmlC14NIsVisibleCallback, xmlNodePtr}, | ||
c_helpers::xmlGetNodeType, | ||
tree::{c14n::*, Node}, | ||
}; | ||
|
||
use super::node_ancestors; | ||
|
||
impl Node { | ||
/// Canonicalize a document and return the results. | ||
pub fn canonicalize(&mut self, options: CanonicalizationOptions) -> Result<String, ()> { | ||
let doc_ref = self.get_docref().upgrade().unwrap(); | ||
let document = crate::tree::Document(doc_ref); | ||
|
||
let user_data = self.node_ptr_mut().unwrap(); | ||
let callback: xmlC14NIsVisibleCallback = Some(callback_wrapper); | ||
|
||
document.canonicalize(options, Some((user_data, callback))) | ||
} | ||
} | ||
|
||
unsafe extern "C" fn callback_wrapper( | ||
c14n_root_ptr: *mut c_void, | ||
node_ptr: xmlNodePtr, | ||
parent_ptr: xmlNodePtr, | ||
) -> ::std::os::raw::c_int { | ||
let c14n_root_ptr = c14n_root_ptr as xmlNodePtr; | ||
let node_type = xmlGetNodeType(node_ptr); | ||
|
||
let tn_ptr = if NODE_TYPES.contains(&node_type) { | ||
node_ptr | ||
} else { | ||
parent_ptr | ||
}; | ||
|
||
let tn_ancestors = node_ancestors(tn_ptr); | ||
|
||
let ret = (tn_ptr == c14n_root_ptr) || tn_ancestors.contains(&c14n_root_ptr); | ||
if ret { | ||
1 | ||
} else { | ||
0 | ||
} | ||
} | ||
|
||
const NODE_TYPES: [u32; 7] = [ | ||
super::xmlElementType_XML_ELEMENT_NODE, | ||
super::xmlElementType_XML_ATTRIBUTE_NODE, | ||
super::xmlElementType_XML_DOCUMENT_TYPE_NODE, | ||
super::xmlElementType_XML_TEXT_NODE, | ||
super::xmlElementType_XML_DTD_NODE, | ||
super::xmlElementType_XML_PI_NODE, | ||
super::xmlElementType_XML_COMMENT_NODE, | ||
]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With
inclusive_ns_prefixes
as part of theCanonicalizationMode
(see below), there are only two options left. For those, I'd rather have them as arguments to the functions instead of having an options struct.