common/utf8conv.c

   1 /* utf8conf.c -  UTF8 character set conversion
   2  * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
   3  *               2008, 2010  Free Software Foundation, Inc.
   4  *
   5  * This file is part of GnuPG.
   6  *
   7  * GnuPG is free software; you can redistribute it and/or modify it
   8  * under the terms of either
   9  *
  10  *   - the GNU Lesser General Public License as published by the Free
  11  *     Software Foundation; either version 3 of the License, or (at
  12  *     your option) any later version.
  13  *
  14  * or
  15  *
  16  *   - the GNU General Public License as published by the Free
  17  *     Software Foundation; either version 2 of the License, or (at
  18  *     your option) any later version.
  19  *
  20  * or both in parallel, as here.
  21  *
  22  * GnuPG is distributed in the hope that it will be useful, but
  23  * WITHOUT ANY WARRANTY; without even the implied warranty of
  24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25  * General Public License for more details.
  26  *
  27  * You should have received a copies of the GNU General Public License
  28  * and the GNU Lesser General Public License along with this program;
  29  * if not, see <https://www.gnu.org/licenses/>.
  30  */
  31
  32 #include <config.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <stdarg.h>
  36 #include <ctype.h>
  37 #ifdef HAVE_LANGINFO_CODESET
  38 #include <langinfo.h>
  39 #endif
  40 #include <errno.h>
  41
  42 #if HAVE_W32_SYSTEM
  43 # /* Tell libgpg-error to provide the iconv macros.  */
  44 # define GPGRT_ENABLE_W32_ICONV_MACROS 1
  45 #elif HAVE_ANDROID_SYSTEM
  46 # /* No iconv support.  */
  47 #else
  48 # include <iconv.h>
  49 #endif
  50
  51
  52 #include "util.h"
  53 #include "common-defs.h"
  54 #include "i18n.h"
  55 #include "stringhelp.h"
  56 #include "utf8conv.h"
  57
  58 #ifndef MB_LEN_MAX
  59 #define MB_LEN_MAX 16
  60 #endif
  61
  62 static const char *active_charset_name = "iso-8859-1";
  63 static int no_translation;     /* Set to true if we let simply pass through. */
  64 static int use_iconv;          /* iconv conversion functions required. */
  65
  66
  67 #ifdef HAVE_ANDROID_SYSTEM
  68 /* Fake stuff to get things building.  */
  69 typedef void *iconv_t;
  70 #define ICONV_CONST
  71
  72 static iconv_t
  73 iconv_open (const char *tocode, const char *fromcode)
  74 {
  75   (void)tocode;
  76   (void)fromcode;
  77   return (iconv_t)(-1);
  78 }
  79
  80 static size_t
  81 iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
  82        char **outbuf, size_t *outbytesleft)
  83 {
  84   (void)cd;
  85   (void)inbuf;
  86   (void)inbytesleft;
  87   (void)outbuf;
  88   (void)outbytesleft;
  89   return (size_t)(0);
  90 }
  91
  92 static int
  93 iconv_close (iconv_t cd)
  94 {
  95   (void)cd;
  96   return 0;
  97 }
  98 #endif /*HAVE_ANDROID_SYSTEM*/
  99
 100
 101 /* Error handler for iconv failures. This is needed to not clutter the
 102    output with repeated diagnostics about a missing conversion. */
 103 static void
 104 handle_iconv_error (const char *to, const char *from, int use_fallback)
 105 {
 106   if (errno == EINVAL)
 107     {
 108       static int shown1, shown2;
 109       int x;
 110
 111       if (to && !strcmp (to, "utf-8"))
 112         {
 113           x = shown1;
 114           shown1 = 1;
 115         }
 116       else
 117         {
 118           x = shown2;
 119           shown2 = 1;
 120         }
 121
 122       if (!x)
 123         log_info (_("conversion from '%s' to '%s' not available\n"),
 124                   from, to);
 125     }
 126   else
 127     {
 128       static int shown;
 129
 130       if (!shown)
 131         log_info (_("iconv_open failed: %s\n"), strerror (errno));
 132       shown = 1;
 133     }
 134
 135   if (use_fallback)
 136     {
 137       /* To avoid further error messages we fallback to UTF-8 for the
 138          native encoding.  Nowadays this seems to be the best bet in
 139          case of errors from iconv or nl_langinfo.  */
 140       active_charset_name = "utf-8";
 141       no_translation = 0;
 142       use_iconv = 0;
 143     }
 144 }
 145
 146
 147
 148 int
 149 set_native_charset (const char *newset)
 150 {
 151   const char *full_newset;
 152
 153   if (!newset)
 154     {
 155 #ifdef HAVE_ANDROID_SYSTEM
 156       newset = "utf-8";
 157 #elif defined HAVE_W32_SYSTEM
 158       static char codepage[30];
 159       unsigned int cpno;
 160       const char *aliases;
 161
 162       /* We are a console program thus we need to use the
 163          GetConsoleOutputCP function and not the the GetACP which
 164          would give the codepage for a GUI program.  Note this is not
 165          a bulletproof detection because GetConsoleCP might return a
 166          different one for console input.  Not sure how to cope with
 167          that.  If the console Code page is not known we fall back to
 168          the system code page.  */
 169 #ifndef HAVE_W32CE_SYSTEM
 170       cpno = GetConsoleOutputCP ();
 171       if (!cpno)
 172 #endif
 173         cpno = GetACP ();
 174       sprintf (codepage, "CP%u", cpno );
 175       /* Resolve alias.  We use a long string string and not the usual
 176          array to optimize if the code is taken to a DSO.  Taken from
 177          libiconv 1.9.2. */
 178       newset = codepage;
 179       for (aliases = ("CP936"   "\0" "GBK" "\0"
 180                       "CP1361"  "\0" "JOHAB" "\0"
 181                       "CP20127" "\0" "ASCII" "\0"
 182                       "CP20866" "\0" "KOI8-R" "\0"
 183                       "CP21866" "\0" "KOI8-RU" "\0"
 184                       "CP28591" "\0" "ISO-8859-1" "\0"
 185                       "CP28592" "\0" "ISO-8859-2" "\0"
 186                       "CP28593" "\0" "ISO-8859-3" "\0"
 187                       "CP28594" "\0" "ISO-8859-4" "\0"
 188                       "CP28595" "\0" "ISO-8859-5" "\0"
 189                       "CP28596" "\0" "ISO-8859-6" "\0"
 190                       "CP28597" "\0" "ISO-8859-7" "\0"
 191                       "CP28598" "\0" "ISO-8859-8" "\0"
 192                       "CP28599" "\0" "ISO-8859-9" "\0"
 193                       "CP28605" "\0" "ISO-8859-15" "\0"
 194                       "CP65001" "\0" "UTF-8" "\0");
 195            *aliases;
 196            aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
 197         {
 198           if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
 199             {
 200               newset = aliases + strlen (aliases) + 1;
 201               break;
 202             }
 203         }
 204
 205 #else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
 206
 207 #ifdef HAVE_LANGINFO_CODESET
 208       newset = nl_langinfo (CODESET);
 209 #else /*!HAVE_LANGINFO_CODESET*/
 210       /* Try to get the used charset from environment variables.  */
 211       static char codepage[30];
 212       const char *lc, *dot, *mod;
 213
 214       strcpy (codepage, "iso-8859-1");
 215       lc = getenv ("LC_ALL");
 216       if (!lc || !*lc)
 217         {
 218           lc = getenv ("LC_CTYPE");
 219           if (!lc || !*lc)
 220             lc = getenv ("LANG");
 221         }
 222       if (lc && *lc)
 223         {
 224           dot = strchr (lc, '.');
 225           if (dot)
 226             {
 227               mod = strchr (++dot, '@');
 228               if (!mod)
 229                 mod = dot + strlen (dot);
 230               if (mod - dot < sizeof codepage && dot != mod)
 231                 {
 232                   memcpy (codepage, dot, mod - dot);
 233                   codepage [mod - dot] = 0;
 234                 }
 235             }
 236         }
 237       newset = codepage;
 238 #endif /*!HAVE_LANGINFO_CODESET*/
 239 #endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
 240     }
 241
 242   full_newset = newset;
 243   if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
 244     {
 245       newset += 3;
 246       if (*newset == '-' || *newset == '_')
 247         newset++;
 248     }
 249
 250   /* Note that we silently assume that plain ASCII is actually meant
 251      as Latin-1.  This makes sense because many Unix system don't have
 252      their locale set up properly and thus would get annoying error
 253      messages and we have to handle all the "bug" reports. Latin-1 has
 254      traditionally been the character set used for 8 bit characters on
 255      Unix systems. */
 256   if ( !*newset
 257        || !ascii_strcasecmp (newset, "8859-1" )
 258        || !ascii_strcasecmp (newset, "646" )
 259        || !ascii_strcasecmp (newset, "ASCII" )
 260        || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
 261        )
 262     {
 263       active_charset_name = "iso-8859-1";
 264       no_translation = 0;
 265       use_iconv = 0;
 266     }
 267   else if ( !ascii_strcasecmp (newset, "utf8" )
 268             || !ascii_strcasecmp(newset, "utf-8") )
 269     {
 270       active_charset_name = "utf-8";
 271       no_translation = 1;
 272       use_iconv = 0;
 273     }
 274   else
 275     {
 276       iconv_t cd;
 277
 278       cd = iconv_open (full_newset, "utf-8");
 279       if (cd == (iconv_t)-1)
 280         {
 281           handle_iconv_error (full_newset, "utf-8", 0);
 282           return -1;
 283         }
 284       iconv_close (cd);
 285       cd = iconv_open ("utf-8", full_newset);
 286       if (cd == (iconv_t)-1)
 287         {
 288           handle_iconv_error ("utf-8", full_newset, 0);
 289           return -1;
 290         }
 291       iconv_close (cd);
 292       active_charset_name = full_newset;
 293       no_translation = 0;
 294       use_iconv = 1;
 295     }
 296   return 0;
 297 }
 298
 299 const char *
 300 get_native_charset ()
 301 {
 302   return active_charset_name;
 303 }
 304
 305 /* Return true if the native charset is utf-8.  */
 306 int
 307 is_native_utf8 (void)
 308 {
 309   return no_translation;
 310 }
 311
 312
 313 /* Convert string, which is in native encoding to UTF8 and return a
 314    new allocated UTF-8 string.  This function terminates the process
 315    on memory shortage.  */
 316 char *
 317 native_to_utf8 (const char *orig_string)
 318 {
 319   const unsigned char *string = (const unsigned char *)orig_string;
 320   const unsigned char *s;
 321   char *buffer;
 322   unsigned char *p;
 323   size_t length = 0;
 324
 325   if (no_translation)
 326     {
 327       /* Already utf-8 encoded. */
 328       buffer = xstrdup (orig_string);
 329     }
 330   else if (!use_iconv)
 331     {
 332       /* For Latin-1 we can avoid the iconv overhead. */
 333       for (s = string; *s; s++)
 334         {
 335           length++;
 336           if (*s & 0x80)
 337             length++;
 338         }
 339       buffer = xmalloc (length + 1);
 340       for (p = (unsigned char *)buffer, s = string; *s; s++)
 341         {
 342           if ( (*s & 0x80 ))
 343             {
 344               *p++ = 0xc0 | ((*s >> 6) & 3);
 345               *p++ = 0x80 | (*s & 0x3f);
 346             }
 347           else
 348             *p++ = *s;
 349         }
 350       *p = 0;
 351     }
 352   else
 353     {
 354       /* Need to use iconv.  */
 355       iconv_t cd;
 356       const char *inptr;
 357       char *outptr;
 358       size_t inbytes, outbytes;
 359
 360       cd = iconv_open ("utf-8", active_charset_name);
 361       if (cd == (iconv_t)-1)
 362         {
 363           handle_iconv_error ("utf-8", active_charset_name, 1);
 364           return native_to_utf8 (string);
 365         }
 366
 367       for (s=string; *s; s++ )
 368         {
 369           length++;
 370           if ((*s & 0x80))
 371             length += 5; /* We may need up to 6 bytes for the utf8 output. */
 372         }
 373       buffer = xmalloc (length + 1);
 374
 375       inptr = string;
 376       inbytes = strlen (string);
 377       outptr = buffer;
 378       outbytes = length;
 379       if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
 380                   &outptr, &outbytes) == (size_t)-1)
 381         {
 382           static int shown;
 383
 384           if (!shown)
 385             log_info (_("conversion from '%s' to '%s' failed: %s\n"),
 386                       active_charset_name, "utf-8", strerror (errno));
 387           shown = 1;
 388           /* We don't do any conversion at all but use the strings as is. */
 389           strcpy (buffer, string);
 390         }
 391       else /* Success.  */
 392         {
 393           *outptr = 0;
 394           /* We could realloc the buffer now but I doubt that it makes
 395              much sense given that it will get freed anyway soon
 396              after.  */
 397         }
 398       iconv_close (cd);
 399     }
 400   return buffer;
 401 }
 402
 403
 404
 405 static char *
 406 do_utf8_to_native (const char *string, size_t length, int delim,
 407                    int with_iconv)
 408 {
 409   int nleft;
 410   int i;
 411   unsigned char encbuf[8];
 412   int encidx;
 413   const unsigned char *s;
 414   size_t n;
 415   char *buffer = NULL;
 416   char *p = NULL;
 417   unsigned long val = 0;
 418   size_t slen;
 419   int resync = 0;
 420
 421   /* First pass (p==NULL): count the extended utf-8 characters.  */
 422   /* Second pass (p!=NULL): create string.  */
 423   for (;;)
 424     {
 425       for (slen = length, nleft = encidx = 0, n = 0,
 426              s = (const unsigned char *)string;
 427            slen;
 428            s++, slen--)
 429         {
 430           if (resync)
 431             {
 432               if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
 433                 {
 434                   /* Still invalid. */
 435                   if (p)
 436                     {
 437                       sprintf (p, "\\x%02x", *s);
 438                       p += 4;
 439                     }
 440                   n += 4;
 441                   continue;
 442                 }
 443               resync = 0;
 444             }
 445           if (!nleft)
 446             {
 447               if (!(*s & 0x80))
 448                 {
 449                   /* Plain ascii. */
 450                   if ( delim != -1
 451                        && (*s < 0x20 || *s == 0x7f || *s == delim
 452                            || (delim && *s == '\\')))
 453                     {
 454                       n++;
 455                       if (p)
 456                         *p++ = '\\';
 457                       switch (*s)
 458                         {
 459                         case '\n': n++; if ( p ) *p++ = 'n'; break;
 460                         case '\r': n++; if ( p ) *p++ = 'r'; break;
 461                         case '\f': n++; if ( p ) *p++ = 'f'; break;
 462                         case '\v': n++; if ( p ) *p++ = 'v'; break;
 463                         case '\b': n++; if ( p ) *p++ = 'b'; break;
 464                         case    0: n++; if ( p ) *p++ = '0'; break;
 465                         default:
 466                           n += 3;
 467                           if (p)
 468                             {
 469                               sprintf (p, "x%02x", *s);
 470                               p += 3;
 471                             }
 472                           break;
 473                         }
 474                     }
 475                   else
 476                     {
 477                       if (p)
 478                         *p++ = *s;
 479                       n++;
 480                     }
 481                 }
 482               else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
 483                 {
 484                   val = *s & 0x1f;
 485                   nleft = 1;
 486                   encidx = 0;
 487                   encbuf[encidx++] = *s;
 488                 }
 489               else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
 490                 {
 491                   val = *s & 0x0f;
 492                   nleft = 2;
 493                   encidx = 0;
 494                   encbuf[encidx++] = *s;
 495                 }
 496               else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
 497                 {
 498                   val = *s & 0x07;
 499                   nleft = 3;
 500                   encidx = 0;
 501                   encbuf[encidx++] = *s;
 502                 }
 503               else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
 504                 {
 505                   val = *s & 0x03;
 506                   nleft = 4;
 507                   encidx = 0;
 508                   encbuf[encidx++] = *s;
 509                 }
 510               else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
 511                 {
 512                   val = *s & 0x01;
 513                   nleft = 5;
 514                   encidx = 0;
 515                   encbuf[encidx++] = *s;
 516                 }
 517               else /* Invalid encoding: print as \xNN. */
 518                 {
 519                   if (p)
 520                     {
 521                       sprintf (p, "\\x%02x", *s);
 522                       p += 4;
 523                     }
 524                   n += 4;
 525                   resync = 1;
 526                 }
 527             }
 528           else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
 529             {
 530               if (p)
 531                 {
 532                   for (i = 0; i < encidx; i++)
 533                     {
 534                       sprintf (p, "\\x%02x", encbuf[i]);
 535                       p += 4;
 536                     }
 537                   sprintf (p, "\\x%02x", *s);
 538                   p += 4;
 539                 }
 540               n += 4 + 4 * encidx;
 541               nleft = 0;
 542               encidx = 0;
 543               resync = 1;
 544             }
 545           else
 546             {
 547               encbuf[encidx++] = *s;
 548               val <<= 6;
 549               val |= *s & 0x3f;
 550               if (!--nleft)  /* Ready. */
 551                 {
 552                   if (no_translation)
 553                     {
 554                       if (p)
 555                         {
 556                           for (i = 0; i < encidx; i++)
 557                             *p++ = encbuf[i];
 558                         }
 559                       n += encidx;
 560                       encidx = 0;
 561                     }
 562                   else if (with_iconv)
 563                     {
 564                       /* Our strategy for using iconv is a bit strange
 565                          but it better keeps compatibility with
 566                          previous versions in regard to how invalid
 567                          encodings are displayed.  What we do is to
 568                          keep the utf-8 as is and have the real
 569                          translation step then at the end.  Yes, I
 570                          know that this is ugly.  However we are short
 571                          of the 1.4 release and for this branch we
 572                          should not mess too much around with iconv
 573                          things.  One reason for this is that we don't
 574                          know enough about non-GNU iconv
 575                          implementation and want to minimize the risk
 576                          of breaking the code on too many platforms.  */
 577                         if ( p )
 578                           {
 579                             for (i=0; i < encidx; i++ )
 580                               *p++ = encbuf[i];
 581                           }
 582                         n += encidx;
 583                         encidx = 0;
 584                     }
 585                   else  /* Latin-1 case. */
 586                     {
 587                       if (val >= 0x80 && val < 256)
 588                         {
 589                           /* We can simply print this character */
 590                           n++;
 591                           if (p)
 592                             *p++ = val;
 593                         }
 594                       else
 595                         {
 596                           /* We do not have a translation: print utf8. */
 597                           if (p)
 598                             {
 599                               for (i = 0; i < encidx; i++)
 600                                 {
 601                                   sprintf (p, "\\x%02x", encbuf[i]);
 602                                   p += 4;
 603                                 }
 604                             }
 605                           n += encidx * 4;
 606                           encidx = 0;
 607                         }
 608                     }
 609                 }
 610
 611             }
 612         }
 613       if (!buffer)
 614         {
 615           /* Allocate the buffer after the first pass. */
 616           buffer = p = xmalloc (n + 1);
 617         }
 618       else if (with_iconv)
 619         {
 620           /* Note: See above for comments.  */
 621           iconv_t cd;
 622           const char *inptr;
 623           char *outbuf, *outptr;
 624           size_t inbytes, outbytes;
 625
 626           *p = 0;  /* Terminate the buffer. */
 627
 628           cd = iconv_open (active_charset_name, "utf-8");
 629           if (cd == (iconv_t)-1)
 630             {
 631               handle_iconv_error (active_charset_name, "utf-8", 1);
 632               xfree (buffer);
 633               return utf8_to_native (string, length, delim);
 634             }
 635
 636           /* Allocate a new buffer large enough to hold all possible
 637              encodings. */
 638           n = p - buffer + 1;
 639           inbytes = n - 1;;
 640           inptr = buffer;
 641           outbytes = n * MB_LEN_MAX;
 642           if (outbytes / MB_LEN_MAX != n)
 643             BUG (); /* Actually an overflow. */
 644           outbuf = outptr = xmalloc (outbytes);
 645           if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
 646                       &outptr, &outbytes) == (size_t)-1)
 647             {
 648               static int shown;
 649
 650               if (!shown)
 651                 log_info (_("conversion from '%s' to '%s' failed: %s\n"),
 652                           "utf-8", active_charset_name, strerror (errno));
 653               shown = 1;
 654               /* Didn't worked out.  Try again but without iconv.  */
 655               xfree (buffer);
 656               buffer = NULL;
 657               xfree (outbuf);
 658               outbuf = do_utf8_to_native (string, length, delim, 0);
 659             }
 660             else /* Success.  */
 661               {
 662                 *outptr = 0; /* Make sure it is a string. */
 663                 /* We could realloc the buffer now but I doubt that it
 664                    makes much sense given that it will get freed
 665                    anyway soon after.  */
 666                 xfree (buffer);
 667               }
 668           iconv_close (cd);
 669           return outbuf;
 670         }
 671       else /* Not using iconv. */
 672         {
 673           *p = 0; /* Make sure it is a string. */
 674           return buffer;
 675         }
 676     }
 677 }
 678
 679 /* Convert string, which is in UTF-8 to native encoding.  Replace
 680    illegal encodings by some "\xnn" and quote all control
 681    characters. A character with value DELIM will always be quoted, it
 682    must be a vanilla ASCII character.  A DELIM value of -1 is special:
 683    it disables all quoting of control characters.  This function
 684    terminates the process on memory shortage.  */
 685 char *
 686 utf8_to_native (const char *string, size_t length, int delim)
 687 {
 688   return do_utf8_to_native (string, length, delim, use_iconv);
 689 }
 690
 691
 692
 693
 694 /* Wrapper function for iconv_open, required for W32 as we dlopen that
 695    library on that system.  */
 696 jnlib_iconv_t
 697 jnlib_iconv_open (const char *tocode, const char *fromcode)
 698 {
 699   return (jnlib_iconv_t)iconv_open (tocode, fromcode);
 700 }
 701
 702
 703 /* Wrapper function for iconv, required for W32 as we dlopen that
 704    library on that system.  */
 705 size_t
 706 jnlib_iconv (jnlib_iconv_t cd,
 707              const char **inbuf, size_t *inbytesleft,
 708              char **outbuf, size_t *outbytesleft)
 709 {
 710   return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
 711                 outbuf, outbytesleft);
 712 }
 713
 714 /* Wrapper function for iconv_close, required for W32 as we dlopen that
 715    library on that system.  */
 716 int
 717 jnlib_iconv_close (jnlib_iconv_t cd)
 718 {
 719   return iconv_close ((iconv_t)cd);
 720 }
 721
 722
 723 #ifdef HAVE_W32_SYSTEM
 724 /* Return a malloced string encoded for CODEPAGE from the wide char input
 725    string STRING.  Caller must free this value.  Returns NULL and sets
 726    ERRNO on failure.  Calling this function with STRING set to NULL is
 727    not defined.  */
 728 static char *
 729 wchar_to_cp (const wchar_t *string, unsigned int codepage)
 730 {
 731   int n;
 732   char *result;
 733
 734   n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
 735   if (n < 0)
 736     {
 737       gpg_err_set_errno (EINVAL);
 738       return NULL;
 739     }
 740
 741   result = xtrymalloc (n+1);
 742   if (!result)
 743     return NULL;
 744
 745   n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
 746   if (n < 0)
 747     {
 748       xfree (result);
 749       gpg_err_set_errno (EINVAL);
 750       result = NULL;
 751     }
 752   return result;
 753 }
 754
 755
 756 /* Return a malloced wide char string from a CODEPAGE encoded input
 757    string STRING.  Caller must free this value.  Returns NULL and sets
 758    ERRNO on failure.  Calling this function with STRING set to NULL is
 759    not defined.  */
 760 static wchar_t *
 761 cp_to_wchar (const char *string, unsigned int codepage)
 762 {
 763   int n;
 764   size_t nbytes;
 765   wchar_t *result;
 766
 767   n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
 768   if (n < 0)
 769     {
 770       gpg_err_set_errno (EINVAL);
 771       return NULL;
 772     }
 773
 774   nbytes = (size_t)(n+1) * sizeof(*result);
 775   if (nbytes / sizeof(*result) != (n+1))
 776     {
 777       gpg_err_set_errno (ENOMEM);
 778       return NULL;
 779     }
 780   result = xtrymalloc (nbytes);
 781   if (!result)
 782     return NULL;
 783
 784   n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
 785   if (n < 0)
 786     {
 787       xfree (result);
 788       gpg_err_set_errno (EINVAL);
 789       result = NULL;
 790     }
 791   return result;
 792 }
 793
 794
 795 /* Return a malloced string encoded in the active code page from the
 796  * wide char input string STRING.  Caller must free this value.
 797  * Returns NULL and sets ERRNO on failure.  Calling this function with
 798  * STRING set to NULL is not defined.  */
 799 char *
 800 wchar_to_native (const wchar_t *string)
 801 {
 802   return wchar_to_cp (string, CP_ACP);
 803 }
 804
 805
 806 /* Return a malloced wide char string from an UTF-8 encoded input
 807  * string STRING.  Caller must free this value.  Returns NULL and sets
 808  * ERRNO on failure.  Calling this function with STRING set to NULL is
 809  * not defined.  */
 810 wchar_t *
 811 native_to_wchar (const char *string)
 812 {
 813   return cp_to_wchar (string, CP_ACP);
 814 }
 815
 816
 817 /* Return a malloced string encoded in UTF-8 from the wide char input
 818  * string STRING.  Caller must free this value.  Returns NULL and sets
 819  * ERRNO on failure.  Calling this function with STRING set to NULL is
 820  * not defined.  */
 821 char *
 822 wchar_to_utf8 (const wchar_t *string)
 823 {
 824   return wchar_to_cp (string, CP_UTF8);
 825 }
 826
 827
 828 /* Return a malloced wide char string from an UTF-8 encoded input
 829  * string STRING.  Caller must free this value.  Returns NULL and sets
 830  * ERRNO on failure.  Calling this function with STRING set to NULL is
 831  * not defined.  */
 832 wchar_t *
 833 utf8_to_wchar (const char *string)
 834 {
 835   return cp_to_wchar (string, CP_UTF8);
 836 }
 837
 838 #endif /*HAVE_W32_SYSTEM*/