chiark - git - mdw - termux-packages/blob - packages/hunspell/hunspell-chenc.patch

   1 See https://github.com/hunspell/hunspell/pull/521
   2
   3 Fix chenc and its usages
   4 chenc was changing buffer it was passed to. This caused untrackable
   5 multiple conversions of token and possibly other variables.
   6
   7 Change it to only return converted string
   8
   9 Additionally logic extending dest buffer implicitly assumed that 0
  10 bytes are left in dest buffer. It's not necessarily the case when
  11 converting to UTF-8 as if result would be 2-byte sequence and only 1 byte
  12 is remaining you get E2BIG as well.
  13
  14 This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary.
  15
  16 diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx
  17 --- hunspell-1.6.1/src/tools/hunspell.cxx       2017-03-25 22:20:45.000000000 +0100
  18 +++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx  2017-08-14 23:22:16.246966174 +0200
  19 @@ -243,7 +243,7 @@
  20  #endif
  21
  22  /* change character encoding */
  23 -std::string& chenc(std::string& st, const char* enc1, const char* enc2) {
  24 +std::string chenc(const std::string& st, const char* enc1, const char* enc2) {
  25  #ifndef HAVE_ICONV
  26    (void)enc1;
  27    (void)enc2;
  28 @@ -258,7 +258,7 @@
  29    std::string out(st.size(), std::string::value_type());
  30    size_t c1(st.size());
  31    size_t c2(out.size());
  32 -  ICONV_CONST char* source = &st[0];
  33 +  ICONV_CONST char* source = (ICONV_CONST char*) &st[0];
  34    char* dest = &out[0];
  35    iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
  36    if (conv == (iconv_t)-1) {
  37 @@ -267,9 +267,10 @@
  38      size_t res;
  39      while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) {
  40        if (errno == E2BIG) {
  41 +        ssize_t destoff = dest - const_cast<char*>(&out[0]);
  42          out.resize(out.size() + (c2 += c1));
  43
  44 -        dest = const_cast<char*>(&out[0]) + out.size() - c2;
  45 +        dest = const_cast<char*>(&out[0]) + destoff;
  46        } else
  47          break;
  48      }
  49 @@ -278,7 +279,7 @@
  50      }
  51      iconv_close(conv);
  52      out.resize(dest - &out[0]);
  53 -    st = out;
  54 +    return out;
  55    }
  56
  57    return st;
  58 @@ -507,8 +508,7 @@
  59  #endif
  60
  61  int putdic(const std::string& in_word, Hunspell* pMS) {
  62 -  std::string word(in_word);
  63 -  chenc(word, ui_enc, dic_enc[0]);
  64 +  std::string word = chenc(in_word, ui_enc, dic_enc[0]);
  65
  66    std::string buf;
  67    pMS->input_conv(word.c_str(), buf);
  68 @@ -565,7 +565,7 @@
  69    if (!dic)
  70      return 0;
  71    for (size_t i = 0; i < w.size(); ++i) {
  72 -    chenc(w[i], io_enc, ui_enc);
  73 +    w[i] = chenc(w[i], io_enc, ui_enc);
  74      fprintf(dic, "%s\n", w[i].c_str());
  75    }
  76    fclose(dic);
  77 @@ -595,8 +595,7 @@
  78  // check words in the dictionaries (and set first checked dictionary)
  79  bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) {
  80    for (int i = 0; i < dmax; ++i) {
  81 -    std::string buf(token);
  82 -    chenc(buf, io_enc, dic_enc[*d]);
  83 +    std::string buf = chenc(token, io_enc, dic_enc[*d]);
  84      mystrrep(buf, ENTITY_APOS, "'");
  85      if (checkapos && buf.find('\'') != std::string::npos)
  86        return false;
  87 @@ -937,7 +936,7 @@
  88                  fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str());
  89                }
  90                for (size_t j = 1; j < wlst.size(); ++j) {
  91 -                fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
  92 +                  fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
  93                }
  94                fprintf(stdout, "\n");
  95                fflush(stdout);
  96 @@ -1194,8 +1193,7 @@
  97  }
  98
  99  std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
 100 -  std::string utf8str(token);
 101 -  chenc(utf8str, ioenc, "UTF-8");
 102 +  std::string utf8str = chenc(token, ioenc, "UTF-8");
 103    std::vector<w_char> u;
 104    u8_u16(u, utf8str);
 105    if (!u.empty()) {
 106 @@ -1206,8 +1204,7 @@
 107    }
 108    std::string scratch;
 109    u16_u8(scratch, u);
 110 -  chenc(scratch, "UTF-8", ioenc);
 111 -  return scratch;
 112 +  return chenc(scratch, "UTF-8", ioenc);
 113  }
 114
 115  // for terminal interface
 116 @@ -1532,13 +1529,13 @@
 117        std::vector<std::string> wlst;
 118        dialogscreen(parser, token, filename, info, wlst);  // preview
 119        refresh();
 120 -      std::string buf(token);
 121 -      wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str());
 122 +      std::string dicbuf = chenc(token, io_enc, dic_enc[d]);
 123 +      wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str());
 124        if (wlst.empty()) {
 125          dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
 126        } else {
 127          for (size_t j = 0; j < wlst.size(); ++j) {
 128 -          chenc(wlst[j], dic_enc[d], io_enc);
 129 +          wlst[j] = chenc(wlst[j], dic_enc[d], io_enc);
 130          }
 131          dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
 132        }