netsurf: branch chris/idna2008 updated. release/3.0-1171-g9e8d2f0
by NetSurf Browser Project
Gitweb links:
...log http://git.netsurf-browser.org/netsurf.git/shortlog/9e8d2f028fc511d0471e7...
...commit http://git.netsurf-browser.org/netsurf.git/commit/9e8d2f028fc511d0471e770...
...tree http://git.netsurf-browser.org/netsurf.git/tree/9e8d2f028fc511d0471e77060...
The branch, chris/idna2008 has been updated
via 9e8d2f028fc511d0471e7706095e6b8da620fc18 (commit)
from 68bfa6b5b24a4e7cd0cdc394e684d383f957597e (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commitdiff http://git.netsurf-browser.org/netsurf.git/commit/?id=9e8d2f028fc511d0471...
commit 9e8d2f028fc511d0471e7706095e6b8da620fc18
Author: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Commit: Chris Young <chris(a)unsatisfactorysoftware.co.uk>
Use correct and consistent terminology, remove debug logging
diff --git a/utils/idna.c b/utils/idna.c
index d93fb1e..fcec47d 100644
--- a/utils/idna.c
+++ b/utils/idna.c
@@ -112,7 +112,7 @@ static bool idna__contexto_rule(int32_t cp)
* \param index character in the string which is CONTEXTJ
* \return true if conforming
*/
-static bool idna__contextj_rule(int32_t *string, int index, size_t len)
+static bool idna__contextj_rule(int32_t *label, int index, size_t len)
{
const utf8proc_property_t *unicode_props;
idna_unicode_jt joining_type;
@@ -123,18 +123,18 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
* http://www.iana.org/assignments/idna-tables-5.2.0/idna-tables-5.2.0.xml
*/
- if (string[index] == 0x200c) {
+ if (label[index] == 0x200c) {
if (index == 0) return false; /* No previous character */
- unicode_props = utf8proc_get_property(string[index - 1]);
+ unicode_props = utf8proc_get_property(label[index - 1]);
if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
return true;
match = false;
for (i = 0; i < (index - 1); i++) {
- joining_type = idna__jt_property(string[i]);
+ joining_type = idna__jt_property(label[i]);
if (((joining_type == IDNA_UNICODE_JT_L) ||
(joining_type == IDNA_UNICODE_JT_D)) &&
- (idna__jt_property(string[i+1]) == IDNA_UNICODE_JT_T)) {
+ (idna__jt_property(label[i+1]) == IDNA_UNICODE_JT_T)) {
match = true;
break;
}
@@ -142,11 +142,11 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
if (match == false) return false;
- if (idna__jt_property(string[index+1]) != IDNA_UNICODE_JT_T)
+ if (idna__jt_property(label[index+1]) != IDNA_UNICODE_JT_T)
return false;
for (i = (index + 1); i < (int)len; i++) {
- joining_type = idna__jt_property(string[i]);
+ joining_type = idna__jt_property(label[i]);
if ((joining_type == IDNA_UNICODE_JT_R) ||
(joining_type == IDNA_UNICODE_JT_D)) {
return true;
@@ -156,9 +156,9 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
return false;
- } else if (string[index] == 0x200d) {
+ } else if (label[index] == 0x200d) {
if (index == 0) return false; /* No previous character */
- unicode_props = utf8proc_get_property(string[index - 1]);
+ unicode_props = utf8proc_get_property(label[index - 1]);
if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
return true;
return false;
@@ -172,31 +172,31 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
/**
* Convert a UTF-8 string to UCS-4
*
- * \param utf8_host UTF-8 string containing host
- * \param len Length of host string (in bytes)
- * \param ucs4_host Pointer to update with the output
+ * \param utf8_label UTF-8 string containing host label
+ * \param len Length of host label (in bytes)
+ * \param ucs4_label Pointer to update with the output
* \param ucs4_len Pointer to update with the length
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__utf8_to_ucs4(const char *utf8_host, size_t len, int32_t **ucs4_host, size_t *ucs4_len)
+static nserror idna__utf8_to_ucs4(const char *utf8_label, size_t len, int32_t **ucs4_label, size_t *ucs4_len)
{
- int32_t *nfc_host;
+ int32_t *nfc_label;
ssize_t nfc_size;
- nfc_host = malloc(len * 4);
- if (nfc_host == NULL) return NSERROR_NOMEM;
+ nfc_label = malloc(len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
- nfc_size = utf8proc_decompose((const uint8_t *)utf8_host, len,
- nfc_host, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+ nfc_size = utf8proc_decompose((const uint8_t *)utf8_label, len,
+ nfc_label, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- nfc_size = utf8proc_normalise(nfc_host, nfc_size,
+ nfc_size = utf8proc_normalise(nfc_label, nfc_size,
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- *ucs4_host = nfc_host;
+ *ucs4_label = nfc_label;
*ucs4_len = nfc_size;
return NSERROR_OK;
}
@@ -205,28 +205,28 @@ static nserror idna__utf8_to_ucs4(const char *utf8_host, size_t len, int32_t **u
/**
* Convert a UCS-4 string to UTF-8
*
- * \param ucs4_host UCS-4 string containing host
- * \param ucs4_len Length of host string (in bytes)
- * \param utf8_host Pointer to update with the output
+ * \param ucs4_label UCS-4 string containing host label
+ * \param ucs4_len Length of host label (in bytes)
+ * \param utf8_label Pointer to update with the output
* \param utf8_len Pointer to update with the length
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ucs4_to_utf8(const int32_t *ucs4_host, size_t ucs4_len, char **utf8_host, size_t *utf8_len)
+static nserror idna__ucs4_to_utf8(const int32_t *ucs4_label, size_t ucs4_len, char **utf8_label, size_t *utf8_len)
{
- int32_t *nfc_host;
+ int32_t *nfc_label;
ssize_t nfc_size = ucs4_len;
- nfc_host = malloc(1 + ucs4_len * 4);
- if (nfc_host == NULL) return NSERROR_NOMEM;
- memcpy(nfc_host, ucs4_host, ucs4_len * 4);
+ nfc_label = malloc(1 + ucs4_len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
+ memcpy(nfc_label, ucs4_label, ucs4_len * 4);
- nfc_size = utf8proc_reencode(nfc_host, ucs4_len,
+ nfc_size = utf8proc_reencode(nfc_label, ucs4_len,
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- *utf8_host = (char *)nfc_host;
+ *utf8_label = (char *)nfc_label;
*utf8_len = nfc_size;
return NSERROR_OK;
@@ -236,17 +236,17 @@ static nserror idna__ucs4_to_utf8(const int32_t *ucs4_host, size_t ucs4_len, cha
/**
* Convert a host label in UCS-4 to an ACE version
*
- * \param ucs4_host UCS-4 NFC string containing host
- * \param len Length of host string (in characters/codepoints)
- * \param ace_host ASCII-compatible encoded version
- * \param out_len Length of ace_host
+ * \param ucs4_label UCS-4 NFC string containing host label
+ * \param len Length of host label (in characters/codepoints)
+ * \param ace_label ASCII-compatible encoded version
+ * \param out_len Length of ace_label
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host, size_t *out_len)
+static nserror idna__ucs4_to_ace(int32_t *ucs4_label, size_t len, char **ace_label, size_t *out_len)
{
- char punycode[65]; /* max length of host part + NULL */
+ char punycode[65]; /* max length of host label + NULL */
enum punycode_status status;
size_t output_length = 60; /* punycode length - 4 - 1 */
@@ -255,7 +255,7 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
punycode[2] = '-';
punycode[3] = '-';
- status = punycode_encode(len, (const punycode_uint *)ucs4_host,
+ status = punycode_encode(len, (const punycode_uint *)ucs4_label,
NULL, &output_length, punycode + 4);
if (status == punycode_bad_input) {
LOG(("Bad input"));
@@ -271,7 +271,7 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
output_length += SLEN("xn--");
punycode[output_length] = '\0';
- *ace_host = strdup(punycode);
+ *ace_label = strdup(punycode);
*out_len = output_length;
return NSERROR_OK;
@@ -281,28 +281,28 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
/**
* Convert a host label in ACE format to UCS-4
*
- * \param ace_host ASCII string containing host
- * \param ace_len Length of host string
- * \param ucs4_host Pointer to hold UCS4 decoded version
- * \param ucs4_len Pointer to hold length of ucs4_host
+ * \param ace_label ASCII string containing host label
+ * \param ace_len Length of host label
+ * \param ucs4_label Pointer to hold UCS4 decoded version
+ * \param ucs4_len Pointer to hold length of ucs4_label
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t **ucs4_host, size_t *ucs4_len)
+static nserror idna__ace_to_ucs4(const char *ace_label, size_t ace_len, int32_t **ucs4_label, size_t *ucs4_len)
{
int32_t *ucs4;
enum punycode_status status;
size_t output_length = ace_len; /* never exceeds input length */
/* The header should always have been checked before calling */
- assert((ace_host[0] == 'x') && (ace_host[1] == 'n') &&
- (ace_host[2] == '-') && (ace_host[3] == '-'));
+ assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
+ (ace_label[2] == '-') && (ace_label[3] == '-'));
ucs4 = malloc(output_length * 4);
if (ucs4 == NULL) return NSERROR_NOMEM;
- status = punycode_decode(ace_len - 4, ace_host + 4,
+ status = punycode_decode(ace_len - 4, ace_label + 4,
&output_length, (punycode_uint *)ucs4, NULL);
if (status == punycode_bad_input) {
@@ -318,7 +318,7 @@ static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t *
ucs4[output_length] = '\0';
- *ucs4_host = ucs4;
+ *ucs4_label = ucs4;
*ucs4_len = output_length;
return NSERROR_OK;
@@ -332,7 +332,7 @@ static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t *
* \param max_length Length of host string to search (in bytes)
* \return Distance to next separator character or end of string
*/
-static size_t idna__host_part_length(const char *host, size_t max_length)
+static size_t idna__host_label_length(const char *host, size_t max_length)
{
const char *p = host;
size_t length = 0;
@@ -350,11 +350,11 @@ static size_t idna__host_part_length(const char *host, size_t max_length)
/**
* Check if a host label is valid for IDNA2008
*
- * \param host Host label to check (UCS-4)
- * \param len Length of host string (in characters/codepoints)
+ * \param label Host label to check (UCS-4)
+ * \param len Length of host label (in characters/codepoints)
* \return true if compliant, false otherwise
*/
-static bool idna__is_valid(int32_t *host, size_t len)
+static bool idna__is_valid(int32_t *label, size_t len)
{
const utf8proc_property_t *unicode_props;
idna_property idna_prop;
@@ -366,13 +366,13 @@ static bool idna__is_valid(int32_t *host, size_t len)
*/
/* 2. Check characters 3 and 4 are not '--'. */
- if ((host[2] == 0x002d) && (host[3] == 0x002d)) {
+ if ((label[2] == 0x002d) && (label[3] == 0x002d)) {
LOG(("Check failed: characters 2 and 3 are '--'"));
return false;
}
/* 3. Check the first character is not a combining mark */
- unicode_props = utf8proc_get_property(host[0]);
+ unicode_props = utf8proc_get_property(label[0]);
if ((unicode_props->category == UTF8PROC_CATEGORY_MN) ||
(unicode_props->category == UTF8PROC_CATEGORY_MC) ||
@@ -382,18 +382,18 @@ static bool idna__is_valid(int32_t *host, size_t len)
}
for (i = 0; i < len; i++) {
- idna_prop = idna__cp_property(host[i]);
+ idna_prop = idna__cp_property(label[i]);
/* 4. Check characters not DISALLOWED by RFC5892 */
if (idna_prop == IDNA_P_DISALLOWED) {
- LOG(("Check failed: character %d (%x) is DISALLOWED", i, host[i]));
+ LOG(("Check failed: character %d (%x) is DISALLOWED", i, label[i]));
return false;
}
/* 5. Check CONTEXTJ characters conform to defined rules */
if (idna_prop == IDNA_P_CONTEXTJ) {
- if (idna__contextj_rule(host, i, len) == false) {
- LOG(("Check failed: character %d (%x) does not conform to CONTEXTJ rule", i, host[i]));
+ if (idna__contextj_rule(label, i, len) == false) {
+ LOG(("Check failed: character %d (%x) does not conform to CONTEXTJ rule", i, label[i]));
return false;
}
}
@@ -401,15 +401,15 @@ static bool idna__is_valid(int32_t *host, size_t len)
/* 6. Check CONTEXTO characters have a rule defined */
/*\todo optionally we can check conformance to this rule */
if (idna_prop == IDNA_P_CONTEXTO) {
- if (idna__contexto_rule(host[i]) == false) {
- LOG(("Check failed: character %d (%x) has no CONTEXTO rule defined", i, host[i]));
+ if (idna__contexto_rule(label[i]) == false) {
+ LOG(("Check failed: character %d (%x) has no CONTEXTO rule defined", i, label[i]));
return false;
}
}
/* 7. Check characters are not UNASSIGNED */
if (idna_prop == IDNA_P_UNASSIGNED) {
- LOG(("Check failed: character %d (%x) is UNASSIGNED", i, host[i]));
+ LOG(("Check failed: character %d (%x) is UNASSIGNED", i, label[i]));
return false;
}
@@ -423,13 +423,13 @@ static bool idna__is_valid(int32_t *host, size_t len)
/**
* Check if a host label is LDH
*
- * \param host Host label to check
- * \param len Length of host string
+ * \param label Host label to check
+ * \param len Length of host label
* \return true if LDH compliant, false otherwise
*/
-static bool idna__is_ldh(const char *host, size_t len)
+static bool idna__is_ldh(const char *label, size_t len)
{
- const char *p = host;
+ const char *p = label;
size_t i = 0;
/* Check for leading or trailing hyphens */
@@ -454,19 +454,19 @@ static bool idna__is_ldh(const char *host, size_t len)
/**
* Check if a host label appears to be ACE
*
- * \param host Host label to check
- * \param len Length of host string
+ * \param label Host label to check
+ * \param len Length of host label
* \return true if ACE compliant, false otherwise
*/
-static bool idna__is_ace(const char *host, size_t len)
+static bool idna__is_ace(const char *label, size_t len)
{
/* Check it is a valid DNS string */
- if (idna__is_ldh(host, len) == false)
+ if (idna__is_ldh(label, len) == false)
return false;
/* Check the ACE prefix is present */
- if ((host[0] == 'x') && (host[1] == 'n') &&
- (host[2] == '-') && (host[3] == '-'))
+ if ((label[0] == 'x') && (label[1] == 'n') &&
+ (label[2] == '-') && (label[3] == '-'))
return true;
return false;
@@ -513,16 +513,16 @@ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_l
{
nserror error;
int32_t *ucs4_host;
- size_t part_len, output_len, ucs4_len, fqdn_len = 0;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
char fqdn[256];
char *output, *fqdn_p = fqdn;
- while ((part_len = idna__host_part_length(host, len)) != 0) {
- if (idna__is_ldh(host, part_len) == false) {
+ while ((label_len = idna__host_label_length(host, len)) != 0) {
+ if (idna__is_ldh(host, label_len) == false) {
/* This string is IDN or invalid */
/* Convert to Unicode */
- if ((error = idna__utf8_to_ucs4(host, part_len,
+ if ((error = idna__utf8_to_ucs4(host, label_len,
&ucs4_host, &ucs4_len)) != NSERROR_OK)
return error;
@@ -541,31 +541,30 @@ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_l
fqdn_len += output_len;
} else {
/* This is already a DNS-valid ASCII string */
- if ((idna__is_ace(host, part_len) == true) &&
- (idna__verify(host, part_len) == false)) {
+ if ((idna__is_ace(host, label_len) == true) &&
+ (idna__verify(host, label_len) == false)) {
LOG(("Cannot verify ACE label %s", host));
return NSERROR_BAD_URL;
}
- strncpy(fqdn_p, host, part_len);
- fqdn_p += part_len;
- fqdn_len += part_len;
+ strncpy(fqdn_p, host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
}
*fqdn_p = '.';
fqdn_p++;
fqdn_len++;
- host += part_len;
+ host += label_len;
if ((*host == '\0') || (*host == ':')) break;
host++;
- len = len - part_len - 1;
+ len = len - label_len - 1;
}
fqdn_p--;
*fqdn_p = '\0';
*ace_host = strdup(fqdn);
*ace_len = fqdn_len - 1; /* last character is NULL */
- LOG(("Punycode FQDN >> %s", *ace_host));
return NSERROR_OK;
}
@@ -576,16 +575,16 @@ nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *h
{
nserror error;
int32_t *ucs4_host;
- size_t part_len, output_len, ucs4_len, fqdn_len = 0;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
char fqdn[256];
char *output, *fqdn_p = fqdn;
- while ((part_len = idna__host_part_length(ace_host, ace_len)) != 0) {
- if (idna__is_ace(ace_host, part_len) == true) {
+ while ((label_len = idna__host_label_length(ace_host, ace_len)) != 0) {
+ if (idna__is_ace(ace_host, label_len) == true) {
/* This string is DNS-valid and (probably) encoded */
/* Decode to Unicode */
- error = idna__ace_to_ucs4(ace_host, part_len,
+ error = idna__ace_to_ucs4(ace_host, label_len,
&ucs4_host, &ucs4_len);
if (error != NSERROR_OK) return error;
@@ -601,26 +600,26 @@ nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *h
fqdn_len += output_len;
} else {
/* Not ACE */
- memcpy(fqdn_p, ace_host, part_len);
- fqdn_p += part_len;
- fqdn_len += part_len;
+ memcpy(fqdn_p, ace_host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
}
*fqdn_p = '.';
fqdn_p++;
fqdn_len++;
- ace_host += part_len;
+ ace_host += label_len;
if ((*ace_host == '\0') || (*ace_host == ':')) break;
ace_host++;
- ace_len = ace_len - part_len - 1;
+ ace_len = ace_len - label_len - 1;
}
fqdn_p--;
*fqdn_p = '\0';
*host = strdup(fqdn);
*host_len = fqdn_len - 1; /* last character is NULL */
- LOG(("Decoded FQDN >> %s", *host));
+
return NSERROR_OK;
}
-----------------------------------------------------------------------
Summary of changes:
utils/idna.c | 183 +++++++++++++++++++++++++++++-----------------------------
1 files changed, 91 insertions(+), 92 deletions(-)
diff --git a/utils/idna.c b/utils/idna.c
index d93fb1e..fcec47d 100644
--- a/utils/idna.c
+++ b/utils/idna.c
@@ -112,7 +112,7 @@ static bool idna__contexto_rule(int32_t cp)
* \param index character in the string which is CONTEXTJ
* \return true if conforming
*/
-static bool idna__contextj_rule(int32_t *string, int index, size_t len)
+static bool idna__contextj_rule(int32_t *label, int index, size_t len)
{
const utf8proc_property_t *unicode_props;
idna_unicode_jt joining_type;
@@ -123,18 +123,18 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
* http://www.iana.org/assignments/idna-tables-5.2.0/idna-tables-5.2.0.xml
*/
- if (string[index] == 0x200c) {
+ if (label[index] == 0x200c) {
if (index == 0) return false; /* No previous character */
- unicode_props = utf8proc_get_property(string[index - 1]);
+ unicode_props = utf8proc_get_property(label[index - 1]);
if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
return true;
match = false;
for (i = 0; i < (index - 1); i++) {
- joining_type = idna__jt_property(string[i]);
+ joining_type = idna__jt_property(label[i]);
if (((joining_type == IDNA_UNICODE_JT_L) ||
(joining_type == IDNA_UNICODE_JT_D)) &&
- (idna__jt_property(string[i+1]) == IDNA_UNICODE_JT_T)) {
+ (idna__jt_property(label[i+1]) == IDNA_UNICODE_JT_T)) {
match = true;
break;
}
@@ -142,11 +142,11 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
if (match == false) return false;
- if (idna__jt_property(string[index+1]) != IDNA_UNICODE_JT_T)
+ if (idna__jt_property(label[index+1]) != IDNA_UNICODE_JT_T)
return false;
for (i = (index + 1); i < (int)len; i++) {
- joining_type = idna__jt_property(string[i]);
+ joining_type = idna__jt_property(label[i]);
if ((joining_type == IDNA_UNICODE_JT_R) ||
(joining_type == IDNA_UNICODE_JT_D)) {
return true;
@@ -156,9 +156,9 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
return false;
- } else if (string[index] == 0x200d) {
+ } else if (label[index] == 0x200d) {
if (index == 0) return false; /* No previous character */
- unicode_props = utf8proc_get_property(string[index - 1]);
+ unicode_props = utf8proc_get_property(label[index - 1]);
if (unicode_props->combining_class == UTF8PROC_CCC_VIRAMA)
return true;
return false;
@@ -172,31 +172,31 @@ static bool idna__contextj_rule(int32_t *string, int index, size_t len)
/**
* Convert a UTF-8 string to UCS-4
*
- * \param utf8_host UTF-8 string containing host
- * \param len Length of host string (in bytes)
- * \param ucs4_host Pointer to update with the output
+ * \param utf8_label UTF-8 string containing host label
+ * \param len Length of host label (in bytes)
+ * \param ucs4_label Pointer to update with the output
* \param ucs4_len Pointer to update with the length
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__utf8_to_ucs4(const char *utf8_host, size_t len, int32_t **ucs4_host, size_t *ucs4_len)
+static nserror idna__utf8_to_ucs4(const char *utf8_label, size_t len, int32_t **ucs4_label, size_t *ucs4_len)
{
- int32_t *nfc_host;
+ int32_t *nfc_label;
ssize_t nfc_size;
- nfc_host = malloc(len * 4);
- if (nfc_host == NULL) return NSERROR_NOMEM;
+ nfc_label = malloc(len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
- nfc_size = utf8proc_decompose((const uint8_t *)utf8_host, len,
- nfc_host, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
+ nfc_size = utf8proc_decompose((const uint8_t *)utf8_label, len,
+ nfc_label, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- nfc_size = utf8proc_normalise(nfc_host, nfc_size,
+ nfc_size = utf8proc_normalise(nfc_label, nfc_size,
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- *ucs4_host = nfc_host;
+ *ucs4_label = nfc_label;
*ucs4_len = nfc_size;
return NSERROR_OK;
}
@@ -205,28 +205,28 @@ static nserror idna__utf8_to_ucs4(const char *utf8_host, size_t len, int32_t **u
/**
* Convert a UCS-4 string to UTF-8
*
- * \param ucs4_host UCS-4 string containing host
- * \param ucs4_len Length of host string (in bytes)
- * \param utf8_host Pointer to update with the output
+ * \param ucs4_label UCS-4 string containing host label
+ * \param ucs4_len Length of host label (in bytes)
+ * \param utf8_label Pointer to update with the output
* \param utf8_len Pointer to update with the length
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ucs4_to_utf8(const int32_t *ucs4_host, size_t ucs4_len, char **utf8_host, size_t *utf8_len)
+static nserror idna__ucs4_to_utf8(const int32_t *ucs4_label, size_t ucs4_len, char **utf8_label, size_t *utf8_len)
{
- int32_t *nfc_host;
+ int32_t *nfc_label;
ssize_t nfc_size = ucs4_len;
- nfc_host = malloc(1 + ucs4_len * 4);
- if (nfc_host == NULL) return NSERROR_NOMEM;
- memcpy(nfc_host, ucs4_host, ucs4_len * 4);
+ nfc_label = malloc(1 + ucs4_len * 4);
+ if (nfc_label == NULL) return NSERROR_NOMEM;
+ memcpy(nfc_label, ucs4_label, ucs4_len * 4);
- nfc_size = utf8proc_reencode(nfc_host, ucs4_len,
+ nfc_size = utf8proc_reencode(nfc_label, ucs4_len,
UTF8PROC_STABLE | UTF8PROC_COMPOSE);
if(nfc_size < 0) return NSERROR_NOMEM;
- *utf8_host = (char *)nfc_host;
+ *utf8_label = (char *)nfc_label;
*utf8_len = nfc_size;
return NSERROR_OK;
@@ -236,17 +236,17 @@ static nserror idna__ucs4_to_utf8(const int32_t *ucs4_host, size_t ucs4_len, cha
/**
* Convert a host label in UCS-4 to an ACE version
*
- * \param ucs4_host UCS-4 NFC string containing host
- * \param len Length of host string (in characters/codepoints)
- * \param ace_host ASCII-compatible encoded version
- * \param out_len Length of ace_host
+ * \param ucs4_label UCS-4 NFC string containing host label
+ * \param len Length of host label (in characters/codepoints)
+ * \param ace_label ASCII-compatible encoded version
+ * \param out_len Length of ace_label
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host, size_t *out_len)
+static nserror idna__ucs4_to_ace(int32_t *ucs4_label, size_t len, char **ace_label, size_t *out_len)
{
- char punycode[65]; /* max length of host part + NULL */
+ char punycode[65]; /* max length of host label + NULL */
enum punycode_status status;
size_t output_length = 60; /* punycode length - 4 - 1 */
@@ -255,7 +255,7 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
punycode[2] = '-';
punycode[3] = '-';
- status = punycode_encode(len, (const punycode_uint *)ucs4_host,
+ status = punycode_encode(len, (const punycode_uint *)ucs4_label,
NULL, &output_length, punycode + 4);
if (status == punycode_bad_input) {
LOG(("Bad input"));
@@ -271,7 +271,7 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
output_length += SLEN("xn--");
punycode[output_length] = '\0';
- *ace_host = strdup(punycode);
+ *ace_label = strdup(punycode);
*out_len = output_length;
return NSERROR_OK;
@@ -281,28 +281,28 @@ static nserror idna__ucs4_to_ace(int32_t *ucs4_host, size_t len, char **ace_host
/**
* Convert a host label in ACE format to UCS-4
*
- * \param ace_host ASCII string containing host
- * \param ace_len Length of host string
- * \param ucs4_host Pointer to hold UCS4 decoded version
- * \param ucs4_len Pointer to hold length of ucs4_host
+ * \param ace_label ASCII string containing host label
+ * \param ace_len Length of host label
+ * \param ucs4_label Pointer to hold UCS4 decoded version
+ * \param ucs4_len Pointer to hold length of ucs4_label
* \return NSERROR_OK on success, appropriate error otherwise
*
* If return value != NSERROR_OK, output will be left untouched.
*/
-static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t **ucs4_host, size_t *ucs4_len)
+static nserror idna__ace_to_ucs4(const char *ace_label, size_t ace_len, int32_t **ucs4_label, size_t *ucs4_len)
{
int32_t *ucs4;
enum punycode_status status;
size_t output_length = ace_len; /* never exceeds input length */
/* The header should always have been checked before calling */
- assert((ace_host[0] == 'x') && (ace_host[1] == 'n') &&
- (ace_host[2] == '-') && (ace_host[3] == '-'));
+ assert((ace_label[0] == 'x') && (ace_label[1] == 'n') &&
+ (ace_label[2] == '-') && (ace_label[3] == '-'));
ucs4 = malloc(output_length * 4);
if (ucs4 == NULL) return NSERROR_NOMEM;
- status = punycode_decode(ace_len - 4, ace_host + 4,
+ status = punycode_decode(ace_len - 4, ace_label + 4,
&output_length, (punycode_uint *)ucs4, NULL);
if (status == punycode_bad_input) {
@@ -318,7 +318,7 @@ static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t *
ucs4[output_length] = '\0';
- *ucs4_host = ucs4;
+ *ucs4_label = ucs4;
*ucs4_len = output_length;
return NSERROR_OK;
@@ -332,7 +332,7 @@ static nserror idna__ace_to_ucs4(const char *ace_host, size_t ace_len, int32_t *
* \param max_length Length of host string to search (in bytes)
* \return Distance to next separator character or end of string
*/
-static size_t idna__host_part_length(const char *host, size_t max_length)
+static size_t idna__host_label_length(const char *host, size_t max_length)
{
const char *p = host;
size_t length = 0;
@@ -350,11 +350,11 @@ static size_t idna__host_part_length(const char *host, size_t max_length)
/**
* Check if a host label is valid for IDNA2008
*
- * \param host Host label to check (UCS-4)
- * \param len Length of host string (in characters/codepoints)
+ * \param label Host label to check (UCS-4)
+ * \param len Length of host label (in characters/codepoints)
* \return true if compliant, false otherwise
*/
-static bool idna__is_valid(int32_t *host, size_t len)
+static bool idna__is_valid(int32_t *label, size_t len)
{
const utf8proc_property_t *unicode_props;
idna_property idna_prop;
@@ -366,13 +366,13 @@ static bool idna__is_valid(int32_t *host, size_t len)
*/
/* 2. Check characters 3 and 4 are not '--'. */
- if ((host[2] == 0x002d) && (host[3] == 0x002d)) {
+ if ((label[2] == 0x002d) && (label[3] == 0x002d)) {
LOG(("Check failed: characters 2 and 3 are '--'"));
return false;
}
/* 3. Check the first character is not a combining mark */
- unicode_props = utf8proc_get_property(host[0]);
+ unicode_props = utf8proc_get_property(label[0]);
if ((unicode_props->category == UTF8PROC_CATEGORY_MN) ||
(unicode_props->category == UTF8PROC_CATEGORY_MC) ||
@@ -382,18 +382,18 @@ static bool idna__is_valid(int32_t *host, size_t len)
}
for (i = 0; i < len; i++) {
- idna_prop = idna__cp_property(host[i]);
+ idna_prop = idna__cp_property(label[i]);
/* 4. Check characters not DISALLOWED by RFC5892 */
if (idna_prop == IDNA_P_DISALLOWED) {
- LOG(("Check failed: character %d (%x) is DISALLOWED", i, host[i]));
+ LOG(("Check failed: character %d (%x) is DISALLOWED", i, label[i]));
return false;
}
/* 5. Check CONTEXTJ characters conform to defined rules */
if (idna_prop == IDNA_P_CONTEXTJ) {
- if (idna__contextj_rule(host, i, len) == false) {
- LOG(("Check failed: character %d (%x) does not conform to CONTEXTJ rule", i, host[i]));
+ if (idna__contextj_rule(label, i, len) == false) {
+ LOG(("Check failed: character %d (%x) does not conform to CONTEXTJ rule", i, label[i]));
return false;
}
}
@@ -401,15 +401,15 @@ static bool idna__is_valid(int32_t *host, size_t len)
/* 6. Check CONTEXTO characters have a rule defined */
/*\todo optionally we can check conformance to this rule */
if (idna_prop == IDNA_P_CONTEXTO) {
- if (idna__contexto_rule(host[i]) == false) {
- LOG(("Check failed: character %d (%x) has no CONTEXTO rule defined", i, host[i]));
+ if (idna__contexto_rule(label[i]) == false) {
+ LOG(("Check failed: character %d (%x) has no CONTEXTO rule defined", i, label[i]));
return false;
}
}
/* 7. Check characters are not UNASSIGNED */
if (idna_prop == IDNA_P_UNASSIGNED) {
- LOG(("Check failed: character %d (%x) is UNASSIGNED", i, host[i]));
+ LOG(("Check failed: character %d (%x) is UNASSIGNED", i, label[i]));
return false;
}
@@ -423,13 +423,13 @@ static bool idna__is_valid(int32_t *host, size_t len)
/**
* Check if a host label is LDH
*
- * \param host Host label to check
- * \param len Length of host string
+ * \param label Host label to check
+ * \param len Length of host label
* \return true if LDH compliant, false otherwise
*/
-static bool idna__is_ldh(const char *host, size_t len)
+static bool idna__is_ldh(const char *label, size_t len)
{
- const char *p = host;
+ const char *p = label;
size_t i = 0;
/* Check for leading or trailing hyphens */
@@ -454,19 +454,19 @@ static bool idna__is_ldh(const char *host, size_t len)
/**
* Check if a host label appears to be ACE
*
- * \param host Host label to check
- * \param len Length of host string
+ * \param label Host label to check
+ * \param len Length of host label
* \return true if ACE compliant, false otherwise
*/
-static bool idna__is_ace(const char *host, size_t len)
+static bool idna__is_ace(const char *label, size_t len)
{
/* Check it is a valid DNS string */
- if (idna__is_ldh(host, len) == false)
+ if (idna__is_ldh(label, len) == false)
return false;
/* Check the ACE prefix is present */
- if ((host[0] == 'x') && (host[1] == 'n') &&
- (host[2] == '-') && (host[3] == '-'))
+ if ((label[0] == 'x') && (label[1] == 'n') &&
+ (label[2] == '-') && (label[3] == '-'))
return true;
return false;
@@ -513,16 +513,16 @@ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_l
{
nserror error;
int32_t *ucs4_host;
- size_t part_len, output_len, ucs4_len, fqdn_len = 0;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
char fqdn[256];
char *output, *fqdn_p = fqdn;
- while ((part_len = idna__host_part_length(host, len)) != 0) {
- if (idna__is_ldh(host, part_len) == false) {
+ while ((label_len = idna__host_label_length(host, len)) != 0) {
+ if (idna__is_ldh(host, label_len) == false) {
/* This string is IDN or invalid */
/* Convert to Unicode */
- if ((error = idna__utf8_to_ucs4(host, part_len,
+ if ((error = idna__utf8_to_ucs4(host, label_len,
&ucs4_host, &ucs4_len)) != NSERROR_OK)
return error;
@@ -541,31 +541,30 @@ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_l
fqdn_len += output_len;
} else {
/* This is already a DNS-valid ASCII string */
- if ((idna__is_ace(host, part_len) == true) &&
- (idna__verify(host, part_len) == false)) {
+ if ((idna__is_ace(host, label_len) == true) &&
+ (idna__verify(host, label_len) == false)) {
LOG(("Cannot verify ACE label %s", host));
return NSERROR_BAD_URL;
}
- strncpy(fqdn_p, host, part_len);
- fqdn_p += part_len;
- fqdn_len += part_len;
+ strncpy(fqdn_p, host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
}
*fqdn_p = '.';
fqdn_p++;
fqdn_len++;
- host += part_len;
+ host += label_len;
if ((*host == '\0') || (*host == ':')) break;
host++;
- len = len - part_len - 1;
+ len = len - label_len - 1;
}
fqdn_p--;
*fqdn_p = '\0';
*ace_host = strdup(fqdn);
*ace_len = fqdn_len - 1; /* last character is NULL */
- LOG(("Punycode FQDN >> %s", *ace_host));
return NSERROR_OK;
}
@@ -576,16 +575,16 @@ nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *h
{
nserror error;
int32_t *ucs4_host;
- size_t part_len, output_len, ucs4_len, fqdn_len = 0;
+ size_t label_len, output_len, ucs4_len, fqdn_len = 0;
char fqdn[256];
char *output, *fqdn_p = fqdn;
- while ((part_len = idna__host_part_length(ace_host, ace_len)) != 0) {
- if (idna__is_ace(ace_host, part_len) == true) {
+ while ((label_len = idna__host_label_length(ace_host, ace_len)) != 0) {
+ if (idna__is_ace(ace_host, label_len) == true) {
/* This string is DNS-valid and (probably) encoded */
/* Decode to Unicode */
- error = idna__ace_to_ucs4(ace_host, part_len,
+ error = idna__ace_to_ucs4(ace_host, label_len,
&ucs4_host, &ucs4_len);
if (error != NSERROR_OK) return error;
@@ -601,26 +600,26 @@ nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *h
fqdn_len += output_len;
} else {
/* Not ACE */
- memcpy(fqdn_p, ace_host, part_len);
- fqdn_p += part_len;
- fqdn_len += part_len;
+ memcpy(fqdn_p, ace_host, label_len);
+ fqdn_p += label_len;
+ fqdn_len += label_len;
}
*fqdn_p = '.';
fqdn_p++;
fqdn_len++;
- ace_host += part_len;
+ ace_host += label_len;
if ((*ace_host == '\0') || (*ace_host == ':')) break;
ace_host++;
- ace_len = ace_len - part_len - 1;
+ ace_len = ace_len - label_len - 1;
}
fqdn_p--;
*fqdn_p = '\0';
*host = strdup(fqdn);
*host_len = fqdn_len - 1; /* last character is NULL */
- LOG(("Decoded FQDN >> %s", *host));
+
return NSERROR_OK;
}
--
NetSurf Browser