libidn/idna.c

   1 /* idna.c       Convert to or from IDN strings.
   2  * Copyright (C) 2002, 2003, 2004  Simon Josefsson
   3  *
   4  * This file is part of GNU Libidn.
   5  *
   6  * GNU Libidn is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * GNU Libidn is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with GNU Libidn; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  */
  21
  22 #if HAVE_CONFIG_H
  23 # include "config.h"
  24 #endif
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <stringprep.h>
  29 #include <punycode.h>
  30
  31 #include "idna.h"
  32
  33 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||      \
  34                  (c) == 0xFF0E || (c) == 0xFF61)
  35
  36 /* Core functions */
  37
  38 /**
  39  * idna_to_ascii_4i
  40  * @in: input array with unicode code points.
  41  * @inlen: length of input array with unicode code points.
  42  * @out: output zero terminated string that must have room for at
  43  *       least 63 characters plus the terminating zero.
  44  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
  45  *
  46  * The ToASCII operation takes a sequence of Unicode code points that make
  47  * up one label and transforms it into a sequence of code points in the
  48  * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
  49  * resulting sequence are equivalent labels.
  50  *
  51  * It is important to note that the ToASCII operation can fail. ToASCII
  52  * fails if any step of it fails. If any step of the ToASCII operation
  53  * fails on any label in a domain name, that domain name MUST NOT be used
  54  * as an internationalized domain name. The method for deadling with this
  55  * failure is application-specific.
  56  *
  57  * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
  58  * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
  59  * sequence of ASCII code points or a failure condition.
  60  *
  61  * ToASCII never alters a sequence of code points that are all in the ASCII
  62  * range to begin with (although it could fail). Applying the ToASCII
  63  * operation multiple times has exactly the same effect as applying it just
  64  * once.
  65  *
  66  * Return value: Returns 0 on success, or an error code.
  67  */
  68 int
  69 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
  70 {
  71   size_t len, outlen;
  72   uint32_t *src;                /* XXX don't need to copy data? */
  73   int rc;
  74
  75   /*
  76    * ToASCII consists of the following steps:
  77    *
  78    * 1. If all code points in the sequence are in the ASCII range (0..7F)
  79    * then skip to step 3.
  80    */
  81
  82   {
  83     size_t i;
  84     int inasciirange;
  85
  86     inasciirange = 1;
  87     for (i = 0; i < inlen; i++)
  88       if (in[i] > 0x7F)
  89         inasciirange = 0;
  90     if (inasciirange)
  91       {
  92         src = malloc (sizeof (in[0]) * (inlen + 1));
  93         if (src == NULL)
  94           return IDNA_MALLOC_ERROR;
  95
  96         memcpy (src, in, sizeof (in[0]) * inlen);
  97         src[inlen] = 0;
  98
  99         goto step3;
 100       }
 101   }
 102
 103   /*
 104    * 2. Perform the steps specified in [NAMEPREP] and fail if there is
 105    * an error. The AllowUnassigned flag is used in [NAMEPREP].
 106    */
 107
 108   {
 109     char *p;
 110
 111     p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 112     if (p == NULL)
 113       return IDNA_MALLOC_ERROR;
 114
 115     len = strlen (p);
 116     do
 117       {
 118         char *newp;
 119
 120         len = 2 * len + 10;     /* XXX better guess? */
 121         newp = realloc (p, len);
 122         if (newp == NULL)
 123           {
 124             free (p);
 125             return IDNA_MALLOC_ERROR;
 126           }
 127         p = newp;
 128
 129         if (flags & IDNA_ALLOW_UNASSIGNED)
 130           rc = stringprep_nameprep (p, len);
 131         else
 132           rc = stringprep_nameprep_no_unassigned (p, len);
 133       }
 134     while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 135
 136     if (rc != STRINGPREP_OK)
 137       {
 138         free (p);
 139         return IDNA_STRINGPREP_ERROR;
 140       }
 141
 142     src = stringprep_utf8_to_ucs4 (p, -1, NULL);
 143
 144     free (p);
 145   }
 146
 147 step3:
 148   /*
 149    * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
 150    *
 151    * (a) Verify the absence of non-LDH ASCII code points; that is,
 152    * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
 153    *
 154    * (b) Verify the absence of leading and trailing hyphen-minus;
 155    * that is, the absence of U+002D at the beginning and end of
 156    * the sequence.
 157    */
 158
 159   if (flags & IDNA_USE_STD3_ASCII_RULES)
 160     {
 161       size_t i;
 162
 163       for (i = 0; src[i]; i++)
 164         if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
 165             (src[i] >= 0x3A && src[i] <= 0x40) ||
 166             (src[i] >= 0x5B && src[i] <= 0x60) ||
 167             (src[i] >= 0x7B && src[i] <= 0x7F))
 168           {
 169             free (src);
 170             return IDNA_CONTAINS_NON_LDH;
 171           }
 172
 173       if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
 174         {
 175           free (src);
 176           return IDNA_CONTAINS_MINUS;
 177         }
 178     }
 179
 180   /*
 181    * 4. If all code points in the sequence are in the ASCII range
 182    * (0..7F), then skip to step 8.
 183    */
 184
 185   {
 186     size_t i;
 187     int inasciirange;
 188
 189     inasciirange = 1;
 190     for (i = 0; src[i]; i++)
 191       {
 192         if (src[i] > 0x7F)
 193           inasciirange = 0;
 194         /* copy string to output buffer if we are about to skip to step8 */
 195         if (i < 64)
 196           out[i] = src[i];
 197       }
 198     if (i < 64)
 199       out[i] = '\0';
 200     if (inasciirange)
 201       goto step8;
 202   }
 203
 204   /*
 205    * 5. Verify that the sequence does NOT begin with the ACE prefix.
 206    *
 207    */
 208
 209   {
 210     size_t i;
 211     int match;
 212
 213     match = 1;
 214     for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
 215       if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
 216         match = 0;
 217     if (match)
 218       {
 219         free (src);
 220         return IDNA_CONTAINS_ACE_PREFIX;
 221       }
 222   }
 223
 224   /*
 225    * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
 226    * and fail if there is an error.
 227    */
 228   for (len = 0; src[len]; len++)
 229     ;
 230   src[len] = '\0';
 231   outlen = 63 - strlen (IDNA_ACE_PREFIX);
 232   rc = punycode_encode (len, src, NULL,
 233                         &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
 234   if (rc != PUNYCODE_SUCCESS)
 235     {
 236       free (src);
 237       return IDNA_PUNYCODE_ERROR;
 238     }
 239   out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
 240
 241   /*
 242    * 7. Prepend the ACE prefix.
 243    */
 244
 245   memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
 246
 247   /*
 248    * 8. Verify that the number of code points is in the range 1 to 63
 249    * inclusive (0 is excluded).
 250    */
 251
 252 step8:
 253   free (src);
 254   if (strlen (out) < 1 || strlen (out) > 63)
 255     return IDNA_INVALID_LENGTH;
 256
 257   return IDNA_SUCCESS;
 258 }
 259
 260 /* ToUnicode().  May realloc() utf8in. */
 261 static int
 262 idna_to_unicode_internal (char *utf8in,
 263                           uint32_t * out, size_t * outlen, int flags)
 264 {
 265   int rc;
 266   char tmpout[64];
 267   size_t utf8len = strlen (utf8in) + 1;
 268   size_t addlen = 0;
 269
 270   /*
 271    * ToUnicode consists of the following steps:
 272    *
 273    * 1. If the sequence contains any code points outside the ASCII range
 274    * (0..7F) then proceed to step 2, otherwise skip to step 3.
 275    */
 276
 277   {
 278     size_t i;
 279     int inasciirange;
 280
 281     inasciirange = 1;
 282     for (i = 0; utf8in[i]; i++)
 283       if (utf8in[i] & ~0x7F)
 284         inasciirange = 0;
 285     if (inasciirange)
 286       goto step3;
 287   }
 288
 289   /*
 290    * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
 291    * error. (If step 3 of ToASCII is also performed here, it will not
 292    * affect the overall behavior of ToUnicode, but it is not
 293    * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
 294    */
 295   do
 296     {
 297       char *newp = realloc (utf8in, utf8len + addlen);
 298       if (newp == NULL)
 299         {
 300           free (utf8in);
 301           return IDNA_MALLOC_ERROR;
 302         }
 303       utf8in = newp;
 304       if (flags & IDNA_ALLOW_UNASSIGNED)
 305         rc = stringprep_nameprep (utf8in, utf8len + addlen);
 306       else
 307         rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
 308       addlen += 1;
 309     }
 310   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 311
 312   if (rc != STRINGPREP_OK)
 313     {
 314       free (utf8in);
 315       return IDNA_STRINGPREP_ERROR;
 316     }
 317
 318   /* 3. Verify that the sequence begins with the ACE prefix, and save a
 319    * copy of the sequence.
 320    */
 321
 322 step3:
 323   if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
 324     {
 325       free (utf8in);
 326       return IDNA_NO_ACE_PREFIX;
 327     }
 328
 329   /* 4. Remove the ACE prefix.
 330    */
 331
 332   memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
 333            strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
 334
 335   /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
 336    * and fail if there is an error. Save a copy of the result of
 337    * this step.
 338    */
 339
 340   (*outlen)--;                  /* reserve one for the zero */
 341
 342   rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
 343   if (rc != PUNYCODE_SUCCESS)
 344     {
 345       free (utf8in);
 346       return IDNA_PUNYCODE_ERROR;
 347     }
 348
 349   out[*outlen] = 0;             /* add zero */
 350
 351   /* 6. Apply ToASCII.
 352    */
 353
 354   rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
 355   if (rc != IDNA_SUCCESS)
 356     {
 357       free (utf8in);
 358       return rc;
 359     }
 360
 361   /* 7. Verify that the result of step 6 matches the saved copy from
 362    * step 3, using a case-insensitive ASCII comparison.
 363    */
 364
 365   if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
 366     {
 367       free (utf8in);
 368       return IDNA_ROUNDTRIP_VERIFY_ERROR;
 369     }
 370
 371   /* 8. Return the saved copy from step 5.
 372    */
 373
 374   free (utf8in);
 375   return IDNA_SUCCESS;
 376 }
 377
 378 /**
 379  * idna_to_unicode_44i
 380  * @in: input array with unicode code points.
 381  * @inlen: length of input array with unicode code points.
 382  * @out: output array with unicode code points.
 383  * @outlen: on input, maximum size of output array with unicode code points,
 384  *          on exit, actual size of output array with unicode code points.
 385  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 386  *
 387  * The ToUnicode operation takes a sequence of Unicode code points
 388  * that make up one label and returns a sequence of Unicode code
 389  * points. If the input sequence is a label in ACE form, then the
 390  * result is an equivalent internationalized label that is not in ACE
 391  * form, otherwise the original sequence is returned unaltered.
 392  *
 393  * ToUnicode never fails. If any step fails, then the original input
 394  * sequence is returned immediately in that step.
 395  *
 396  * The Punycode decoder can never output more code points than it
 397  * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
 398  * the number of octets needed to represent a sequence of code points
 399  * depends on the particular character encoding used.
 400  *
 401  * The inputs to ToUnicode are a sequence of code points, the
 402  * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
 403  * ToUnicode is always a sequence of Unicode code points.
 404  *
 405  * Return value: Returns error condition, but it must only be used for
 406  *               debugging purposes.  The output buffer is always
 407  *               guaranteed to contain the correct data according to
 408  *               the specification (sans malloc induced errors).  NB!
 409  *               This means that you normally ignore the return code
 410  *               from this function, as checking it means breaking the
 411  *               standard.
 412  */
 413 int
 414 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
 415                      uint32_t * out, size_t * outlen, int flags)
 416 {
 417   int rc;
 418   size_t outlensave = *outlen;
 419   char *p;
 420
 421   p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 422   if (p == NULL)
 423     return IDNA_MALLOC_ERROR;
 424
 425   rc = idna_to_unicode_internal (p, out, outlen, flags);
 426   if (rc != IDNA_SUCCESS)
 427     {
 428       memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
 429                                          inlen : outlensave));
 430       *outlen = inlen;
 431     }
 432
 433   /* p is freed in idna_to_unicode_internal.  */
 434
 435   return rc;
 436 }
 437
 438 /* Wrappers that handle several labels */
 439
 440 /**
 441  * idna_to_ascii_4z:
 442  * @input: zero terminated input Unicode string.
 443  * @output: pointer to newly allocated output string.
 444  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 445  *
 446  * Convert UCS-4 domain name to ASCII string.  The domain name may
 447  * contain several labels, separated by dots.  The output buffer must
 448  * be deallocated by the caller.
 449  *
 450  * Return value: Returns IDNA_SUCCESS on success, or error code.
 451  **/
 452 int
 453 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
 454 {
 455   const uint32_t *start = input;
 456   const uint32_t *end = input;
 457   char buf[64];
 458   char *out = NULL;
 459   int rc;
 460
 461   /* 1) Whenever dots are used as label separators, the following
 462      characters MUST be recognized as dots: U+002E (full stop),
 463      U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
 464      U+FF61 (halfwidth ideographic full stop). */
 465
 466   if (input[0] == 0)
 467     {
 468       /* Handle implicit zero-length root label. */
 469       *output = malloc (1);
 470       if (!*output)
 471         return IDNA_MALLOC_ERROR;
 472       strcpy (*output, "");
 473       return IDNA_SUCCESS;
 474     }
 475
 476   if (DOTP (input[0]) && input[1] == 0)
 477     {
 478       /* Handle explicit zero-length root label. */
 479       *output = malloc (2);
 480       if (!*output)
 481         return IDNA_MALLOC_ERROR;
 482       strcpy (*output, ".");
 483       return IDNA_SUCCESS;
 484     }
 485
 486   *output = NULL;
 487   do
 488     {
 489       end = start;
 490
 491       for (; *end && !DOTP (*end); end++)
 492         ;
 493
 494       if (*end == '\0' && start == end)
 495         {
 496           /* Handle explicit zero-length root label. */
 497           buf[0] = '\0';
 498         }
 499       else
 500         {
 501           rc = idna_to_ascii_4i (start, end - start, buf, flags);
 502           if (rc != IDNA_SUCCESS)
 503             return rc;
 504         }
 505
 506       if (out)
 507         {
 508           char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
 509           if (!newp)
 510             {
 511               free (out);
 512               return IDNA_MALLOC_ERROR;
 513             }
 514           out = newp;
 515           strcat (out, ".");
 516           strcat (out, buf);
 517         }
 518       else
 519         {
 520           out = (char *) malloc (strlen (buf) + 1);
 521           if (!out)
 522             return IDNA_MALLOC_ERROR;
 523           strcpy (out, buf);
 524         }
 525
 526       start = end + 1;
 527     }
 528   while (*end);
 529
 530   *output = out;
 531
 532   return IDNA_SUCCESS;
 533 }
 534
 535 /**
 536  * idna_to_ascii_8z:
 537  * @input: zero terminated input UTF-8 string.
 538  * @output: pointer to newly allocated output string.
 539  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 540  *
 541  * Convert UTF-8 domain name to ASCII string.  The domain name may
 542  * contain several labels, separated by dots.  The output buffer must
 543  * be deallocated by the caller.
 544  *
 545  * Return value: Returns IDNA_SUCCESS on success, or error code.
 546  **/
 547 int
 548 idna_to_ascii_8z (const char *input, char **output, int flags)
 549 {
 550   uint32_t *ucs4;
 551   size_t ucs4len;
 552   int rc;
 553
 554   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 555   if (!ucs4)
 556     return IDNA_ICONV_ERROR;
 557
 558   rc = idna_to_ascii_4z (ucs4, output, flags);
 559
 560   free (ucs4);
 561
 562   return rc;
 563
 564 }
 565
 566 /**
 567  * idna_to_ascii_lz:
 568  * @input: zero terminated input UTF-8 string.
 569  * @output: pointer to newly allocated output string.
 570  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 571  *
 572  * Convert domain name in the locale's encoding to ASCII string.  The
 573  * domain name may contain several labels, separated by dots.  The
 574  * output buffer must be deallocated by the caller.
 575  *
 576  * Return value: Returns IDNA_SUCCESS on success, or error code.
 577  **/
 578 int
 579 idna_to_ascii_lz (const char *input, char **output, int flags)
 580 {
 581   char *utf8;
 582   int rc;
 583
 584   utf8 = stringprep_locale_to_utf8 (input);
 585   if (!utf8)
 586     return IDNA_ICONV_ERROR;
 587
 588   rc = idna_to_ascii_8z (utf8, output, flags);
 589
 590   free (utf8);
 591
 592   return rc;
 593 }
 594
 595 /**
 596  * idna_to_unicode_4z4z:
 597  * @input: zero-terminated Unicode string.
 598  * @output: pointer to newly allocated output Unicode string.
 599  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 600  *
 601  * Convert possibly ACE encoded domain name in UCS-4 format into a
 602  * UCS-4 string.  The domain name may contain several labels,
 603  * separated by dots.  The output buffer must be deallocated by the
 604  * caller.
 605  *
 606  * Return value: Returns IDNA_SUCCESS on success, or error code.
 607  **/
 608 int
 609 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
 610 {
 611   const uint32_t *start = input;
 612   const uint32_t *end = input;
 613   uint32_t *buf;
 614   size_t buflen;
 615   uint32_t *out = NULL;
 616   size_t outlen = 0;
 617   int rc;
 618
 619   *output = NULL;
 620
 621   do
 622     {
 623       end = start;
 624
 625       for (; *end && !DOTP (*end); end++)
 626         ;
 627
 628       buflen = end - start;
 629       buf = malloc (sizeof (buf[0]) * (buflen + 1));
 630       if (!buf)
 631         return IDNA_MALLOC_ERROR;
 632
 633       rc = idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
 634       /* don't check rc as per specification! */
 635
 636       if (out)
 637         {
 638           uint32_t *newp = realloc (out,
 639                                     sizeof (out[0])
 640                                     * (outlen + 1 + buflen + 1));
 641           if (!newp)
 642             {
 643               free (buf);
 644               free (out);
 645               return IDNA_MALLOC_ERROR;
 646             }
 647           out = newp;
 648           out[outlen++] = 0x002E;       /* '.' (full stop) */
 649           memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
 650           outlen += buflen;
 651           out[outlen] = 0x0;
 652           free (buf);
 653         }
 654       else
 655         {
 656           out = buf;
 657           outlen = buflen;
 658           out[outlen] = 0x0;
 659         }
 660
 661       start = end + 1;
 662     }
 663   while (*end);
 664
 665   *output = out;
 666
 667   return IDNA_SUCCESS;
 668 }
 669
 670 /**
 671  * idna_to_unicode_8z4z:
 672  * @input: zero-terminated UTF-8 string.
 673  * @output: pointer to newly allocated output Unicode string.
 674  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 675  *
 676  * Convert possibly ACE encoded domain name in UTF-8 format into a
 677  * UCS-4 string.  The domain name may contain several labels,
 678  * separated by dots.  The output buffer must be deallocated by the
 679  * caller.
 680  *
 681  * Return value: Returns IDNA_SUCCESS on success, or error code.
 682  **/
 683 int
 684 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
 685 {
 686   uint32_t *ucs4;
 687   size_t ucs4len;
 688   int rc;
 689
 690   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 691   if (!ucs4)
 692     return IDNA_ICONV_ERROR;
 693
 694   rc = idna_to_unicode_4z4z (ucs4, output, flags);
 695   free (ucs4);
 696
 697   return rc;
 698 }
 699
 700 /**
 701  * idna_to_unicode_8z8z:
 702  * @input: zero-terminated UTF-8 string.
 703  * @output: pointer to newly allocated output UTF-8 string.
 704  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 705  *
 706  * Convert possibly ACE encoded domain name in UTF-8 format into a
 707  * UTF-8 string.  The domain name may contain several labels,
 708  * separated by dots.  The output buffer must be deallocated by the
 709  * caller.
 710  *
 711  * Return value: Returns IDNA_SUCCESS on success, or error code.
 712  **/
 713 int
 714 idna_to_unicode_8z8z (const char *input, char **output, int flags)
 715 {
 716   uint32_t *ucs4;
 717   int rc;
 718
 719   rc = idna_to_unicode_8z4z (input, &ucs4, flags);
 720   *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
 721   free (ucs4);
 722
 723   if (!*output)
 724     return IDNA_ICONV_ERROR;
 725
 726   return rc;
 727 }
 728
 729 /**
 730  * idna_to_unicode_8zlz:
 731  * @input: zero-terminated UTF-8 string.
 732  * @output: pointer to newly allocated output string encoded in the
 733  *   current locale's character set.
 734  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 735  *
 736  * Convert possibly ACE encoded domain name in UTF-8 format into a
 737  * string encoded in the current locale's character set.  The domain
 738  * name may contain several labels, separated by dots.  The output
 739  * buffer must be deallocated by the caller.
 740  *
 741  * Return value: Returns IDNA_SUCCESS on success, or error code.
 742  **/
 743 int
 744 idna_to_unicode_8zlz (const char *input, char **output, int flags)
 745 {
 746   char *utf8;
 747   int rc;
 748
 749   rc = idna_to_unicode_8z8z (input, &utf8, flags);
 750   *output = stringprep_utf8_to_locale (utf8);
 751   free (utf8);
 752
 753   if (!*output)
 754     return IDNA_ICONV_ERROR;
 755
 756   return rc;
 757 }
 758
 759 /**
 760  * idna_to_unicode_lzlz:
 761  * @input: zero-terminated string encoded in the current locale's
 762  *   character set.
 763  * @output: pointer to newly allocated output string encoded in the
 764  *   current locale's character set.
 765  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 766  *
 767  * Convert possibly ACE encoded domain name in the locale's character
 768  * set into a string encoded in the current locale's character set.
 769  * The domain name may contain several labels, separated by dots.  The
 770  * output buffer must be deallocated by the caller.
 771  *
 772  * Return value: Returns IDNA_SUCCESS on success, or error code.
 773  **/
 774 int
 775 idna_to_unicode_lzlz (const char *input, char **output, int flags)
 776 {
 777   char *utf8;
 778   int rc;
 779
 780   utf8 = stringprep_locale_to_utf8 (input);
 781   if (!utf8)
 782     return IDNA_ICONV_ERROR;
 783
 784   rc = idna_to_unicode_8zlz (utf8, output, flags);
 785   free (utf8);
 786
 787   return rc;
 788 }
 789
 790 /**
 791  * IDNA_ACE_PREFIX
 792  *
 793  * The IANA allocated prefix to use for IDNA. "xn--"
 794  */
 795
 796 /**
 797  * Idna_rc:
 798  * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
 799  *   always be zero, the remaining ones are only guaranteed to hold
 800  *   non-zero values, for logical comparison purposes.
 801  * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
 802  * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
 803  * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
 804  *   the string contains non-LDH ASCII characters.
 805  * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
 806  *   the string contains a leading or trailing hyphen-minus (U+002D).
 807  * @IDNA_INVALID_LENGTH: The final output string is not within the
 808  *   (inclusive) range 1 to 63 characters.
 809  * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
 810  *   (for ToUnicode).
 811  * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
 812  *   string does not equal the input.
 813  * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
 814  *   ToASCII).
 815  * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
 816  * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
 817  *   fatal error).
 818  * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
 819  *   internally in libc).
 820  *
 821  * Enumerated return codes of idna_to_ascii_4i(),
 822  * idna_to_unicode_44i() functions (and functions derived from those
 823  * functions).  The value 0 is guaranteed to always correspond to
 824  * success.
 825  */
 826
 827
 828 /**
 829  * Idna_flags:
 830  * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
 831  *   Unicode code points.
 832  * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
 833  *   rules (i.e., normal host name rules).
 834  *
 835  * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
 836  */