Gitweb links:
...log
http://git.netsurf-browser.org/netsurf.git/shortlog/0c25ae5e8eb38b3b888a9...
...commit
http://git.netsurf-browser.org/netsurf.git/commit/0c25ae5e8eb38b3b888a9bf...
...tree
http://git.netsurf-browser.org/netsurf.git/tree/0c25ae5e8eb38b3b888a9bfdc...
The branch, master has been updated
via 0c25ae5e8eb38b3b888a9bfdc0fcc6d53af17b04 (commit)
via 1d82ef411a65095f218c15441fb804715e59f0eb (commit)
via 6780766fb7c27145415baa2c40251e386b3e894a (commit)
from 3d739479ea760b948a95edf759d0dc080e26b035 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff
http://git.netsurf-browser.org/netsurf.git/commit/?id=0c25ae5e8eb38b3b888...
commit 0c25ae5e8eb38b3b888a9bfdc0fcc6d53af17b04
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
remove the ambiguity around the reallocation in utf8_to_html()
diff --git a/utils/utf8.c b/utils/utf8.c
index 7aa7d93..84918cc 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -365,11 +365,11 @@ utf8_convert_html_chunk(iconv_t cd,
/* exported interface documented in utils/utf8.h */
nserror
-utf8_to_html(const char *string, const char *encname, size_t len, char **result)
+utf8_to_html(const char *string, const char *encname, size_t len, char **result_out)
{
iconv_t cd;
const char *in;
- char *out, *origout;
+ char *out, *origout, *result;
size_t off, prev_off, inlen, outlen, origoutlen, esclen;
nserror ret;
char *pescape, escape[11];
@@ -452,11 +452,12 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
outlen -= 4;
/* Shrink-wrap */
- *result = realloc(origout, origoutlen - outlen);
- if (*result == NULL) {
+ result = realloc(origout, origoutlen - outlen);
+ if (result == NULL) {
free(origout);
return NSERROR_NOMEM;
}
+ *result_out = result;
return NSERROR_OK;
}
commitdiff
http://git.netsurf-browser.org/netsurf.git/commit/?id=1d82ef411a65095f218...
commit 1d82ef411a65095f218c15441fb804715e59f0eb
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
consolodate duplicated conversion descriptor cache code
diff --git a/utils/utf8.c b/utils/utf8.c
index 84cacd7..7aa7d93 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -44,7 +44,7 @@ uint32_t utf8_to_ucs4(const char *s_in, size_t l)
parserutils_error perror;
perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l,
- &ucs4, &len);
+ &ucs4, &len);
if (perror != PARSERUTILS_OK)
ucs4 = 0xfffd;
@@ -106,7 +106,7 @@ size_t utf8_char_byte_length(const char *s)
parserutils_error perror;
perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s,
- &len);
+ &len);
assert(perror == PARSERUTILS_OK);
return len;
@@ -131,7 +131,7 @@ size_t utf8_next(const char *s, size_t l, size_t o)
parserutils_error perror;
perror = parserutils_charset_utf8_next((const uint8_t *) s, l, o,
- &next);
+ &next);
assert(perror == PARSERUTILS_OK);
return next;
@@ -151,6 +151,47 @@ static inline void utf8_clear_cd_cache(void)
last_cd.cd = 0;
}
+/**
+ * obtain a cached conversion descriptor
+ *
+ * either return the cached conversion descriptor or create one if required
+ */
+static nserror
+get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out)
+{
+ iconv_t cd;
+ /* we cache the last used conversion descriptor,
+ * so check if we're trying to use it here */
+ if (strncasecmp(last_cd.from, enc_from, sizeof(last_cd.from)) == 0 &&
+ strncasecmp(last_cd.to, enc_to, sizeof(last_cd.to)) == 0 &&
+ last_cd.cd != 0) {
+ *cd_out = last_cd.cd;
+ return NSERROR_OK;
+ }
+
+ /* no match, so create a new cd */
+ cd = iconv_open(enc_to, enc_from);
+ if (cd == (iconv_t) -1) {
+ if (errno == EINVAL) {
+ return NSERROR_BAD_ENCODING;
+ }
+ /* default to no memory */
+ return NSERROR_NOMEM;
+ }
+
+ /* close the last cd - we don't care if this fails */
+ if (last_cd.cd) {
+ iconv_close(last_cd.cd);
+ }
+
+ /* and safely copy the to/from/cd data into last_cd */
+ snprintf(last_cd.from, sizeof(last_cd.from), enc_from);
+ snprintf(last_cd.to, sizeof(last_cd.to), "%s", enc_to);
+ *cd_out = last_cd.cd = cd;
+
+ return NSERROR_OK;
+}
+
/* exported interface documented in utils/utf8.h */
nserror utf8_finalise(void)
{
@@ -187,6 +228,7 @@ utf8_convert(const char *string,
iconv_t cd;
char *temp, *out, *in, *result;
size_t result_len;
+ nserror res;
assert(string && from && to && result_out);
@@ -215,29 +257,9 @@ utf8_convert(const char *string,
in = (char *)string;
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) {
- cd = last_cd.cd;
- } else {
- /* no match, so create a new cd */
- cd = iconv_open(to, from);
- if (cd == (iconv_t)-1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "%s", from);
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", to);
- last_cd.cd = cd;
+ res = get_cached_cd(from, to, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
/* Worst case = ASCII -> UCS4, so allocate an output buffer
@@ -289,14 +311,14 @@ utf8_convert(const char *string,
/* exported interface documented in utils/utf8.h */
nserror utf8_to_enc(const char *string, const char *encname,
- size_t len, char **result)
+ size_t len, char **result)
{
return utf8_convert(string, len, "UTF-8", encname, result, NULL);
}
/* exported interface documented in utils/utf8.h */
nserror utf8_from_enc(const char *string, const char *encname,
- size_t len, char **result, size_t *result_len)
+ size_t len, char **result, size_t *result_len)
{
return utf8_convert(string, len, encname, "UTF-8", result, result_len);
}
@@ -327,7 +349,7 @@ utf8_convert_html_chunk(iconv_t cd,
esclen = snprintf(escape, sizeof(escape), "&#x%06x;", ucs4);
pescape = escape;
ret = iconv(cd, (void *) &pescape, &esclen,
- (void *) out, outlen);
+ (void *) out, outlen);
if (ret == (size_t) -1)
return NSERROR_NOMEM;
@@ -339,6 +361,8 @@ utf8_convert_html_chunk(iconv_t cd,
return NSERROR_OK;
}
+
+
/* exported interface documented in utils/utf8.h */
nserror
utf8_to_html(const char *string, const char *encname, size_t len, char **result)
@@ -349,35 +373,14 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
size_t off, prev_off, inlen, outlen, origoutlen, esclen;
nserror ret;
char *pescape, escape[11];
+ nserror res;
if (len == 0)
len = strlen(string);
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, "UTF-8", sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, encname,
- sizeof(last_cd.to)) == 0 &&
- last_cd.cd != 0) {
- cd = last_cd.cd;
- } else {
- /* no match, so create a new cd */
- cd = iconv_open(encname, "UTF-8");
- if (cd == (iconv_t) -1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and safely copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "UTF-8");
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", encname);
- last_cd.cd = cd;
+ res = get_cached_cd("UTF-8", encname, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
/* Worst case is ASCII -> UCS4, with all characters escaped:
@@ -397,13 +400,13 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
while (off < len) {
/* Must escape '&', '<', and '>' */
if (string[off] == '&' || string[off] == '<' ||
- string[off] == '>') {
+ string[off] == '>') {
if (off - prev_off > 0) {
/* Emit chunk */
in = string + prev_off;
inlen = off - prev_off;
ret = utf8_convert_html_chunk(cd, in, inlen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);
@@ -414,10 +417,10 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
/* Emit mandatory escape */
esclen = snprintf(escape, sizeof(escape),
- "&#x%06x;", string[off]);
+ "&#x%06x;", string[off]);
pescape = escape;
ret = utf8_convert_html_chunk(cd, pescape, esclen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);
commitdiff
http://git.netsurf-browser.org/netsurf.git/commit/?id=6780766fb7c27145415...
commit 6780766fb7c27145415baa2c40251e386b3e894a
Author: Vincent Sanders <vince(a)kyllikki.org>
Commit: Vincent Sanders <vince(a)kyllikki.org>
Improve utf8 conversion function
Newer compilers were (correctly) pointing out use after free.
Slightly reworkeed conversion function to remove compiler warnings
and clean up implementation.
diff --git a/utils/utf8.c b/utils/utf8.c
index f0ac0c9..84cacd7 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -168,49 +168,47 @@ nserror utf8_finalise(void)
* Convert a string from one encoding to another
*
* \param string The NULL-terminated string to convert
- * \param len Length of input string to consider (in bytes), or 0
+ * \param slen Length of input string to consider (in bytes), or 0
* \param from The encoding name to convert from
* \param to The encoding name to convert to
- * \param result Pointer to location in which to store result.
- * \param result_len Pointer to location in which to store result length.
+ * \param result_out Pointer to location in which to store result.
+ * \param result_len_out Pointer to location in which to store result length.
* \return NSERROR_OK for no error, NSERROR_NOMEM on allocation error,
* NSERROR_BAD_ENCODING for a bad character encoding
*/
static nserror
utf8_convert(const char *string,
- size_t len,
+ size_t slen,
const char *from,
const char *to,
- char **result,
- size_t *result_len)
+ char **result_out,
+ size_t *result_len_out)
{
iconv_t cd;
- char *temp, *out, *in;
- size_t slen, rlen;
-
- assert(string && from && to && result);
-
- if (string[0] == '\0') {
- /* On AmigaOS, iconv() returns an error if we pass an
- * empty string. This prevents iconv() being called as
- * there is no conversion necessary anyway. */
- *result = strdup("");
- if (!(*result)) {
- *result = NULL;
- return NSERROR_NOMEM;
- }
+ char *temp, *out, *in, *result;
+ size_t result_len;
- return NSERROR_OK;
+ assert(string && from && to && result_out);
+
+ /* calculate the source length if not given */
+ if (slen==0) {
+ slen = strlen(string);
}
- if (strcasecmp(from, to) == 0) {
- /* conversion from an encoding to itself == strdup */
- slen = len ? len : strlen(string);
- *(result) = strndup(string, slen);
- if (!(*result)) {
- *(result) = NULL;
+ /* process the empty string separately avoiding any conversion
+ * check for the source and destination encoding being the same
+ *
+ * This optimisation is necessary on AmigaOS as iconv()
+ * returns an error if an empty string is passed.
+ */
+ if ((slen == 0) || (strcasecmp(from, to) == 0)) {
+ *result_out = strndup(string, slen);
+ if (*result_out == NULL) {
return NSERROR_NOMEM;
}
+ if (result_len_out != NULL) {
+ *result_len_out = slen;
+ }
return NSERROR_OK;
}
@@ -220,10 +218,9 @@ utf8_convert(const char *string,
/* we cache the last used conversion descriptor,
* so check if we're trying to use it here */
if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) {
+ strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) {
cd = last_cd.cd;
- }
- else {
+ } else {
/* no match, so create a new cd */
cd = iconv_open(to, from);
if (cd == (iconv_t)-1) {
@@ -243,20 +240,19 @@ utf8_convert(const char *string,
last_cd.cd = cd;
}
- slen = len ? len : strlen(string);
/* Worst case = ASCII -> UCS4, so allocate an output buffer
* 4 times larger than the input buffer, and add 4 bytes at
* the end for the NULL terminator
*/
- rlen = slen * 4 + 4;
+ result_len = slen * 4 + 4;
- temp = out = malloc(rlen);
+ temp = out = malloc(result_len);
if (!out) {
return NSERROR_NOMEM;
}
/* perform conversion */
- if (iconv(cd, (void *) &in, &slen, &out, &rlen) == (size_t)-1) {
+ if (iconv(cd, (void *) &in, &slen, &out, &result_len) == (size_t)-1) {
free(temp);
/* clear the cached conversion descriptor as it's invalid */
if (last_cd.cd)
@@ -270,19 +266,22 @@ utf8_convert(const char *string,
return NSERROR_NOMEM;
}
- *(result) = realloc(temp, out - temp + 4);
- if (!(*result)) {
+ result_len = out - temp;
+
+ /* resize buffer allowing for null termination */
+ result = realloc(temp, result_len + 4);
+ if (result == NULL) {
free(temp);
- *(result) = NULL; /* for sanity's sake */
return NSERROR_NOMEM;
}
/* NULL terminate - needs 4 characters as we may have
* converted to UTF-32 */
- memset((*result) + (out - temp), 0, 4);
+ memset(result + result_len, 0, 4);
- if (result_len != NULL) {
- *result_len = (out - temp);
+ *result_out = result;
+ if (result_len_out != NULL) {
+ *result_len_out = result_len;
}
return NSERROR_OK;
-----------------------------------------------------------------------
Summary of changes:
utils/utf8.c | 203 +++++++++++++++++++++++++++++-----------------------------
1 file changed, 103 insertions(+), 100 deletions(-)
diff --git a/utils/utf8.c b/utils/utf8.c
index f0ac0c9..84918cc 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -44,7 +44,7 @@ uint32_t utf8_to_ucs4(const char *s_in, size_t l)
parserutils_error perror;
perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l,
- &ucs4, &len);
+ &ucs4, &len);
if (perror != PARSERUTILS_OK)
ucs4 = 0xfffd;
@@ -106,7 +106,7 @@ size_t utf8_char_byte_length(const char *s)
parserutils_error perror;
perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s,
- &len);
+ &len);
assert(perror == PARSERUTILS_OK);
return len;
@@ -131,7 +131,7 @@ size_t utf8_next(const char *s, size_t l, size_t o)
parserutils_error perror;
perror = parserutils_charset_utf8_next((const uint8_t *) s, l, o,
- &next);
+ &next);
assert(perror == PARSERUTILS_OK);
return next;
@@ -151,6 +151,47 @@ static inline void utf8_clear_cd_cache(void)
last_cd.cd = 0;
}
+/**
+ * obtain a cached conversion descriptor
+ *
+ * either return the cached conversion descriptor or create one if required
+ */
+static nserror
+get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out)
+{
+ iconv_t cd;
+ /* we cache the last used conversion descriptor,
+ * so check if we're trying to use it here */
+ if (strncasecmp(last_cd.from, enc_from, sizeof(last_cd.from)) == 0 &&
+ strncasecmp(last_cd.to, enc_to, sizeof(last_cd.to)) == 0 &&
+ last_cd.cd != 0) {
+ *cd_out = last_cd.cd;
+ return NSERROR_OK;
+ }
+
+ /* no match, so create a new cd */
+ cd = iconv_open(enc_to, enc_from);
+ if (cd == (iconv_t) -1) {
+ if (errno == EINVAL) {
+ return NSERROR_BAD_ENCODING;
+ }
+ /* default to no memory */
+ return NSERROR_NOMEM;
+ }
+
+ /* close the last cd - we don't care if this fails */
+ if (last_cd.cd) {
+ iconv_close(last_cd.cd);
+ }
+
+ /* and safely copy the to/from/cd data into last_cd */
+ snprintf(last_cd.from, sizeof(last_cd.from), enc_from);
+ snprintf(last_cd.to, sizeof(last_cd.to), "%s", enc_to);
+ *cd_out = last_cd.cd = cd;
+
+ return NSERROR_OK;
+}
+
/* exported interface documented in utils/utf8.h */
nserror utf8_finalise(void)
{
@@ -168,95 +209,72 @@ nserror utf8_finalise(void)
* Convert a string from one encoding to another
*
* \param string The NULL-terminated string to convert
- * \param len Length of input string to consider (in bytes), or 0
+ * \param slen Length of input string to consider (in bytes), or 0
* \param from The encoding name to convert from
* \param to The encoding name to convert to
- * \param result Pointer to location in which to store result.
- * \param result_len Pointer to location in which to store result length.
+ * \param result_out Pointer to location in which to store result.
+ * \param result_len_out Pointer to location in which to store result length.
* \return NSERROR_OK for no error, NSERROR_NOMEM on allocation error,
* NSERROR_BAD_ENCODING for a bad character encoding
*/
static nserror
utf8_convert(const char *string,
- size_t len,
+ size_t slen,
const char *from,
const char *to,
- char **result,
- size_t *result_len)
+ char **result_out,
+ size_t *result_len_out)
{
iconv_t cd;
- char *temp, *out, *in;
- size_t slen, rlen;
-
- assert(string && from && to && result);
-
- if (string[0] == '\0') {
- /* On AmigaOS, iconv() returns an error if we pass an
- * empty string. This prevents iconv() being called as
- * there is no conversion necessary anyway. */
- *result = strdup("");
- if (!(*result)) {
- *result = NULL;
- return NSERROR_NOMEM;
- }
+ char *temp, *out, *in, *result;
+ size_t result_len;
+ nserror res;
- return NSERROR_OK;
+ assert(string && from && to && result_out);
+
+ /* calculate the source length if not given */
+ if (slen==0) {
+ slen = strlen(string);
}
- if (strcasecmp(from, to) == 0) {
- /* conversion from an encoding to itself == strdup */
- slen = len ? len : strlen(string);
- *(result) = strndup(string, slen);
- if (!(*result)) {
- *(result) = NULL;
+ /* process the empty string separately avoiding any conversion
+ * check for the source and destination encoding being the same
+ *
+ * This optimisation is necessary on AmigaOS as iconv()
+ * returns an error if an empty string is passed.
+ */
+ if ((slen == 0) || (strcasecmp(from, to) == 0)) {
+ *result_out = strndup(string, slen);
+ if (*result_out == NULL) {
return NSERROR_NOMEM;
}
+ if (result_len_out != NULL) {
+ *result_len_out = slen;
+ }
return NSERROR_OK;
}
in = (char *)string;
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) {
- cd = last_cd.cd;
- }
- else {
- /* no match, so create a new cd */
- cd = iconv_open(to, from);
- if (cd == (iconv_t)-1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "%s", from);
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", to);
- last_cd.cd = cd;
+ res = get_cached_cd(from, to, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
- slen = len ? len : strlen(string);
/* Worst case = ASCII -> UCS4, so allocate an output buffer
* 4 times larger than the input buffer, and add 4 bytes at
* the end for the NULL terminator
*/
- rlen = slen * 4 + 4;
+ result_len = slen * 4 + 4;
- temp = out = malloc(rlen);
+ temp = out = malloc(result_len);
if (!out) {
return NSERROR_NOMEM;
}
/* perform conversion */
- if (iconv(cd, (void *) &in, &slen, &out, &rlen) == (size_t)-1) {
+ if (iconv(cd, (void *) &in, &slen, &out, &result_len) == (size_t)-1) {
free(temp);
/* clear the cached conversion descriptor as it's invalid */
if (last_cd.cd)
@@ -270,19 +288,22 @@ utf8_convert(const char *string,
return NSERROR_NOMEM;
}
- *(result) = realloc(temp, out - temp + 4);
- if (!(*result)) {
+ result_len = out - temp;
+
+ /* resize buffer allowing for null termination */
+ result = realloc(temp, result_len + 4);
+ if (result == NULL) {
free(temp);
- *(result) = NULL; /* for sanity's sake */
return NSERROR_NOMEM;
}
/* NULL terminate - needs 4 characters as we may have
* converted to UTF-32 */
- memset((*result) + (out - temp), 0, 4);
+ memset(result + result_len, 0, 4);
- if (result_len != NULL) {
- *result_len = (out - temp);
+ *result_out = result;
+ if (result_len_out != NULL) {
+ *result_len_out = result_len;
}
return NSERROR_OK;
@@ -290,14 +311,14 @@ utf8_convert(const char *string,
/* exported interface documented in utils/utf8.h */
nserror utf8_to_enc(const char *string, const char *encname,
- size_t len, char **result)
+ size_t len, char **result)
{
return utf8_convert(string, len, "UTF-8", encname, result, NULL);
}
/* exported interface documented in utils/utf8.h */
nserror utf8_from_enc(const char *string, const char *encname,
- size_t len, char **result, size_t *result_len)
+ size_t len, char **result, size_t *result_len)
{
return utf8_convert(string, len, encname, "UTF-8", result, result_len);
}
@@ -328,7 +349,7 @@ utf8_convert_html_chunk(iconv_t cd,
esclen = snprintf(escape, sizeof(escape), "&#x%06x;", ucs4);
pescape = escape;
ret = iconv(cd, (void *) &pescape, &esclen,
- (void *) out, outlen);
+ (void *) out, outlen);
if (ret == (size_t) -1)
return NSERROR_NOMEM;
@@ -340,45 +361,26 @@ utf8_convert_html_chunk(iconv_t cd,
return NSERROR_OK;
}
+
+
/* exported interface documented in utils/utf8.h */
nserror
-utf8_to_html(const char *string, const char *encname, size_t len, char **result)
+utf8_to_html(const char *string, const char *encname, size_t len, char **result_out)
{
iconv_t cd;
const char *in;
- char *out, *origout;
+ char *out, *origout, *result;
size_t off, prev_off, inlen, outlen, origoutlen, esclen;
nserror ret;
char *pescape, escape[11];
+ nserror res;
if (len == 0)
len = strlen(string);
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, "UTF-8", sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, encname,
- sizeof(last_cd.to)) == 0 &&
- last_cd.cd != 0) {
- cd = last_cd.cd;
- } else {
- /* no match, so create a new cd */
- cd = iconv_open(encname, "UTF-8");
- if (cd == (iconv_t) -1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and safely copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "UTF-8");
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", encname);
- last_cd.cd = cd;
+ res = get_cached_cd("UTF-8", encname, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
/* Worst case is ASCII -> UCS4, with all characters escaped:
@@ -398,13 +400,13 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
while (off < len) {
/* Must escape '&', '<', and '>' */
if (string[off] == '&' || string[off] == '<' ||
- string[off] == '>') {
+ string[off] == '>') {
if (off - prev_off > 0) {
/* Emit chunk */
in = string + prev_off;
inlen = off - prev_off;
ret = utf8_convert_html_chunk(cd, in, inlen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);
@@ -415,10 +417,10 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
/* Emit mandatory escape */
esclen = snprintf(escape, sizeof(escape),
- "&#x%06x;", string[off]);
+ "&#x%06x;", string[off]);
pescape = escape;
ret = utf8_convert_html_chunk(cd, pescape, esclen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);
@@ -450,11 +452,12 @@ utf8_to_html(const char *string, const char *encname, size_t len,
char **result)
outlen -= 4;
/* Shrink-wrap */
- *result = realloc(origout, origoutlen - outlen);
- if (*result == NULL) {
+ result = realloc(origout, origoutlen - outlen);
+ if (result == NULL) {
free(origout);
return NSERROR_NOMEM;
}
+ *result_out = result;
return NSERROR_OK;
}
--
NetSurf Browser