-
Notifications
You must be signed in to change notification settings - Fork 147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Disable url encodings in myhtml_attribute_add? #169
Comments
Hi @searene #include <myhtml/myhtml.h>
#include <myhtml/serialization.h>
int main(int argc, const char * argv[])
{
char html[] = "<img/>";
// basic init
myhtml_t* myhtml = myhtml_create();
myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
// init tree
myhtml_tree_t* tree = myhtml_tree_create();
myhtml_tree_init(tree, myhtml);
// parse html
myhtml_parse_fragment(tree, MyENCODING_UTF_8, html, strlen(html), MyHTML_TAG_DIV, MyHTML_NAMESPACE_HTML);
// get first img from index
myhtml_collection_t *div_list = myhtml_get_nodes_by_name(tree, NULL, "img", 3, NULL);
myhtml_tree_node_t *node = div_list->list[0];
// add an attr
const char* srcValue = "custom_protocol://resource?id=1&name=apple";
myhtml_attribute_add(node, "src", 3, srcValue, strlen(srcValue), MyENCODING_UTF_8);
printf("Value is: %s\n", myhtml_node_attribute_first(node)->value.data);
mycore_string_raw_t str = {0};
myhtml_serialization_tree_buffer(myhtml_tree_get_document(tree), &str);
printf("Serialization is: %s\n", str.data);
// release resources
myhtml_collection_destroy(div_list);
myhtml_tree_destroy(tree);
myhtml_destroy(myhtml);
return 0;
} I fix it tomorrow. I highly recommend that you use a lexbor HTML parser. This is a modern and very fast parser (like a myhtml). #include <lexbor/html/parser.h>
#include <lexbor/html/serialize.h>
#define FAILED(...) \
do { \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n"); \
exit(EXIT_FAILURE); \
} \
while (0)
int
main(int argc, const char *argv[])
{
lxb_status_t status;
lxb_html_body_element_t *body;
lxb_html_document_t *document;
lxb_dom_collection_t *collection;
lxb_dom_element_t *element;
lxb_dom_attr_t *attr;
static const lxb_char_t html[] = "<img>";
size_t html_len = sizeof(html) - 1;
/* Initialization */
document = lxb_html_document_create();
if (document == NULL) {
FAILED("Failed to create HTML Document");
}
/* Parse HTML */
status = lxb_html_document_parse(document, html, html_len);
if (status != LXB_STATUS_OK) {
FAILED("Failed to parse HTML");
}
/* Create Collection for elements */
collection = lxb_dom_collection_make(&document->dom_document, 16);
if (collection == NULL) {
FAILED("Failed to create collection");
}
/* Get BODY elemenet (root for search) */
body = lxb_html_document_body_element(document);
element = lxb_dom_interface_element(body);
/* Find DIV eleemnt */
status = lxb_dom_elements_by_tag_name(element, collection,
(const lxb_char_t *) "img", 3);
if (status != LXB_STATUS_OK || lxb_dom_collection_length(collection) == 0) {
FAILED("Failed to find IMG element");
}
/* Append new attrtitube */
element = lxb_dom_collection_element(collection, 0);
const char* srcValue = "custom_protocol://resource?id=1&name=apple";
attr = lxb_dom_element_set_attribute(element, (const lxb_char_t *) "src", 3,
(const lxb_char_t *) srcValue, strlen(srcValue));
if (attr == NULL) {
FAILED("Failed to create and append new attribute");
}
/* Print Result */
lexbor_str_t str = {0};
status = lxb_html_serialize_pretty_tree_str(lxb_dom_interface_node(document),
LXB_HTML_SERIALIZE_OPT_UNDEF, 0,
&str);
if (status != LXB_STATUS_OK) {
FAILED("Failed to serialization HTML tree");
}
printf("%s\n", str.data);
lxb_dom_collection_destroy(collection, true);
lxb_html_document_destroy(document);
return 0;
} |
@lexborisov Thanks for the quick reply! Please keep us updated in this issue when you fix this problem, I appreciate it! Regarding |
@lexborisov does lexbor support css selectors? |
I want to disable the automatically applied URL encodings in
myhtml_attribute_add
. To be more specific, please check the following example:The output is as follows:
As you can see, the
&
betweenid=1
andname=apple
was automatically URL-encoded, which is not what I want. I want it to be the same as the one passed by me, in other words, I want the following result:Is there anyway to do this?
The text was updated successfully, but these errors were encountered: