On Wed, 2009-03-04 at 18:53 +0800, Bo Yang wrote:
I write the patch to make the libDOM hubbub compile again. But I
found jmb has done that. And I modify my patch on the newest SVN code.
In addition to make the hubbub binding compile, the patch also
implement most of the tree_handler callbacks.
Thanks for this. Comments below.
diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c
index 7b5e6ab..5a0366d 100644
--- a/bindings/hubbub/parser.c
+++ b/bindings/hubbub/parser.c
@@ -16,10 +16,11 @@
#include "utils.h"
/**
- * libdom Hubbub parser object
+ * libdom Hubbub parser context
*/
struct dom_hubbub_parser {
hubbub_parser *parser; /**< Hubbub parser instance */
+ hubbub_tree_handler tree_handler;
This field needs documenting.
/**
- * Create a Hubbub parser instance
- *
- * \param aliases Path to encoding alias mapping file
- * \param enc Source charset, or NULL
- * \param int_enc Desired charset of document buffer (UTF-8 or
UTF-16)
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data
- * \param msg Informational message function
- * \param mctx Pointer to client-specific private data
- * \return Pointer to instance, or NULL on memory exhaustion
+ * Create the hubbub parsing context
*/
-dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
- const char *enc, const char *int_enc,
+dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
+ const char *enc, bool fix_enc,
dom_alloc alloc, void *pw, dom_msg msg, void *mctx)
Why have you removed the API documentation?
@@ -115,141 +140,510 @@ dom_hubbub_parser
*dom_hubbub_parser_create(const char *aliases,
return NULL;
}
- /* Now, try to get an appropriate implementation from the
registry */
Why has this comment been removed?
err = dom_implregistry_get_dom_implementation(features,
&parser->impl, alloc, pw);
if (err != DOM_NO_ERR) {
- dom_string_unref(features);
- hubbub_parser_destroy(parser->parser);
- alloc(parser, 0, pw);
+ parser->alloc(parser, 0, parser->pw);
You're leaking both the features string and the hubbub_parser object
here.
msg(DOM_MSG_ERROR, mctx, "No suitable
DOMImplementation");
return NULL;
}
- /* no longer need the features string */
- dom_string_unref(features);
Similarly, this needs reinstating, otherwise you're leaking the features
string.
+ err = dom_implementation_create_document(parser->impl,
NULL,
NULL, NULL,
+ &parser->doc, alloc, pw);
+ if (err != DOM_NO_ERR) {
You need to clean up the hubbub_parser here, too.
+ parser->alloc(parser, 0, parser->pw);
+ msg(DOM_MSG_ERROR, mctx, "Can't create DOM document");
+ return NULL;
+ }
-/* Destroy a Hubbub parser instance */
Why has this comment been removed?
void dom_hubbub_parser_destroy(dom_hubbub_parser *parser)
{
dom_implementation_unref(parser->impl);
-
hubbub_parser_destroy(parser->parser);
+ parser->parser = NULL;
- /** \todo do we want to clean up the document here too? */
-
+ dom_node_unref((struct dom_node *) parser->doc);
+ parser->doc = NULL;
Although this is the correct approach, it's not quite right. If the
client has called dom_hubbub_parser_get_document() before destroying the
parser (a not unlikely event), then you're destroying the document from
under the client.
A better solution would be to invalidate the doc pointer in
dom_hubbub_parser_get_document() and then only destroy the document here
if the pointer to it is non-NULL.
-/* Notify parser that datastream is empty */
dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser
*parser)
{
hubbub_error err;
+ parser->complete = true;
err = hubbub_parser_completed(parser->parser);
- if (err != DOM_HUBBUB_OK) {
- parser->msg(DOM_MSG_ERROR, parser->mctx,
- "hubbub_parser_completed failed: %d",
err);
- return DOM_HUBBUB_HUBBUB_ERR | err;
+
+ return DOM_HUBBUB_OK;
+}
Why has the error checking been removed here?
+struct dom_document
*dom_hubbub_parser_get_document(dom_hubbub_parser
*parser)
+{
+ return parser->doc;
+}
The existing implementation only returns the document when parsing has
completed, otherwise it returns NULL. Should this not do the same? Also
see my comments about destroying the document in
dom_hubbub_parser_destroy().
+/* The callbacks definitions */
+static int create_comment(void *parser, const hubbub_string *data,
void **result)
+{
+ dom_hubbub_parser * dom_parser = (dom_hubbub_parser *) parser;
Should be *dom_parser (i.e. lose the space). This occurs in all the
callbacks, so needs fixing in the others, too.
+static int create_doctype(void *parser, const hubbub_doctype
*doctype,
+ void **result)
[...]
+ if (!doctype->public_missing) {
Use if (doctype->public_missing == false) { -- it's less prone to
misreading.
+ if (!doctype->system_missing) {
Same here.
+clean3:
+ if (!doctype->system_missing)
+ dom_string_unref(system_id);
+
+clean2:
+ if (!doctype->public_missing)
+ dom_string_unref(public_id);
And these two, as well :)
+static int create_text(void *parser, const hubbub_string *data,
void
**result)
{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *str;
+ struct dom_text *text = NULL;
+
+ *result = NULL;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
data->ptr,
+ data->len, &str);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create text '%s'",
data->ptr);
The text is not \0 terminated, so you want %.*s'", data->len, data->ptr.
+static int ref_node(void *parser, void *node)
+{
+ UNUSED(parser);
+ struct dom_node *dnode = (struct dom_node *) node;
+ dom_node_ref(dnode);
+
+ return 0;
+}
+
+static int unref_node(void *parser, void *node)
+{
+ UNUSED(parser);
+ struct dom_node *dnode = (struct dom_node *) node;
+ dom_node_unref(dnode);
+
+ return 0;
+}
+
+static int append_child(void *parser, void *parent, void *child, void
**result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_append_child((struct dom_node *)parent,
+ (struct dom_node *)child, (struct dom_node
**)result);
Please put spaces after casts (i.e. (struct dom_node *) parent).
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't append child '%d' for parent
'%
d'",
+ child, parent);
You can't use %d for pointers. Use %p, instead.
+static int insert_before(void *parser, void *parent, void *child,
void *ref_child,
+ void **result)
+{
[...]
+ dom_parser->msg(DOM_MSG_CRITICAL,
dom_parser->mctx,
+ "Can't insert node '%d' before node
'%
d'",
+ child, ref_child);
Same here.
+static int remove_child(void *parser, void *parent, void *child,
void
**result)
+{
[...]
+ dom_parser->msg(DOM_MSG_CRITICAL,
dom_parser->mctx,
+ "Can't remove child '%d'", child);
And here.
+static int clone_node(void *parser, void *node, bool deep, void
**result)
[...]
+ dom_parser->msg(DOM_MSG_CRITICAL,
dom_parser->mctx,
+ "Can't clone node '%d'", node);
And here.
+static int reparent_children(void *parser, void *node, void
*new_parent)
+{
This is wrong. This function must move all children of "node" into
"new_parent", not move "node" into "new_parent".
+static int get_parent(void *parser, void *node, bool element_only,
void **result)
+{
This is also wrong. It simply wants to retrieve the parent of "node" and
return it. If "element_only" is true, then it will only return the
parent of "node" if the parent is a DOM_ELEMENT_NODE.
+static int has_children(void *parser, void *node, bool *result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ UNUSED(parser);
+ err = dom_node_has_child_nodes((struct dom_node *)node,
result);
(struct dom_node *) node.
+static int add_attributes(void *parser, void *node,
+ const hubbub_attribute *attributes, uint32_t
n_attributes)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ uint32_t i;
+
+ for (i=0; i<n_attributes; i++) {
for (i = 0; i < n_attributes; i++) {
+static int change_encoding(void *parser, const char *charset)
+{
+ UNUSED(parser);
+ UNUSED(charset);
+
+ return 0;
}
This will need implementing. Otherwise, when we return from
dom_hubbub_parser_parse_chunk() with the ENCODINGCHANGE return code, the
new encoding is not known. We also need some API to allow the client to
find out the new encoding name.
To be completely useful, both the form_associate and set_quirks
callbacks will need implementing. Though they both need some thought as
to the best approach. Leaving them unimplemented is fine for now.
diff --git a/src/utils/namespace.c b/src/utils/namespace.c
index 8002b8e..8144ac5 100644
--- a/src/utils/namespace.c
+++ b/src/utils/namespace.c
@@ -12,6 +12,7 @@
#include "utils/namespace.h"
#include "utils/utils.h"
+
/** XML prefix */
static struct dom_string *xml;
/** XML namespace URI */
@@ -21,6 +22,21 @@ static struct dom_string *xmlns;
/** XMLNS namespace URI */
static struct dom_string *xmlns_ns;
+/** The namespaces strings */
+static const char *namespaces[] = {
+ NULL,
+ "http://www.w3.org/1999/xhtml",
+ "math:http://www.w3.org/1998/Math/MathML",
+ "svg:http://www.w3.org/2000/svg",
+ "xlink:http://www.w3.org/1999/xlink",
+ "xml:http://www.w3.org/XML/1998/namespace",
+ "xmlns:http://www.w3.org/2000/xmlns/"
+};
Why do the last 5 of these have prefixes? They should just be the URI.
+/** Maybe use the two same number is ugly */
+const int namespace_num = 7;
+struct dom_string *dom_namespaces[7];
Using (sizeof(namespaces) / sizeof(namespaces[0])) may be less prone to
future problems.
/**
* Initialise the namespace component
*
@@ -66,6 +82,16 @@ dom_exception _dom_namespace_initialise(dom_alloc
alloc, void *pw)
return err;
}
+ int i;
+
+ for (i=0; i<namespace_num; i++) {
+ err = dom_string_create(
+ alloc, pw, (const uint8_t
*)namespaces[i],
+ strlen(namespaces[i]),
&dom_namespaces[i]);
+ if (err != DOM_NO_ERR)
+ return err;
+ }
+
return DOM_NO_ERR;
}
@@ -81,6 +107,11 @@ dom_exception _dom_namespace_finalise(void)
dom_string_unref(xml_ns);
dom_string_unref(xml);
+ int i;
+ for (i=0; i<namespace_num; i++) {
+ dom_string_unref(dom_namespaces[i]);
+ }
If initialisation failed, then some of the pointers may be NULL. You
probably want to ensure that isn't the case before unreffing them.
J.