From 2d3744e11966e5ae1c4edd79863d031fc0670523 Mon Sep 17 00:00:00 2001 From: Vladimir Serbinenko Date: Mon, 14 Aug 2017 23:49:06 +0200 Subject: [PATCH] hunspell: Fix Russian dictionarry in emacs. --- packages/hunspell/hunspell-chenc.patch | 130 +++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 packages/hunspell/hunspell-chenc.patch diff --git a/packages/hunspell/hunspell-chenc.patch b/packages/hunspell/hunspell-chenc.patch new file mode 100644 index 000000000..c5a56f0a0 --- /dev/null +++ b/packages/hunspell/hunspell-chenc.patch @@ -0,0 +1,130 @@ +Fix chenc and its usages +chenc was changing buffer it was passed to. This caused untrackable +multiple conversions of token and possibly other variables. + +Change it to only return converted string + +Additionally logic extending dest buffer implicitly assumed that 0 +bytes are left in dest buffer. It's not necessarily the case when +converting to UTF-8 as if result would be 2-byte sequence and only 1 byte +is remaining you get E2BIG as well. + +This fixes the case of pipe (-a) in UTF-8 with KOI8-R dictionary. + +diff -ur hunspell-1.6.1/src/tools/hunspell.cxx hunspell-1.6.1-mod2/src/tools/hunspell.cxx +--- hunspell-1.6.1/src/tools/hunspell.cxx 2017-03-25 22:20:45.000000000 +0100 ++++ hunspell-1.6.1-mod2/src/tools/hunspell.cxx 2017-08-14 23:22:16.246966174 +0200 +@@ -243,7 +243,7 @@ + #endif + + /* change character encoding */ +-std::string& chenc(std::string& st, const char* enc1, const char* enc2) { ++std::string chenc(const std::string& st, const char* enc1, const char* enc2) { + #ifndef HAVE_ICONV + (void)enc1; + (void)enc2; +@@ -258,7 +258,7 @@ + std::string out(st.size(), std::string::value_type()); + size_t c1(st.size()); + size_t c2(out.size()); +- ICONV_CONST char* source = &st[0]; ++ ICONV_CONST char* source = (ICONV_CONST char*) &st[0]; + char* dest = &out[0]; + iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1)); + if (conv == (iconv_t)-1) { +@@ -267,9 +267,10 @@ + size_t res; + while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) { + if (errno == E2BIG) { ++ ssize_t destoff = dest - const_cast(&out[0]); + out.resize(out.size() + (c2 += c1)); + +- dest = const_cast(&out[0]) + out.size() - c2; ++ dest = const_cast(&out[0]) + destoff; + } else + break; + } +@@ -278,7 +279,7 @@ + } + iconv_close(conv); + out.resize(dest - &out[0]); +- st = out; ++ return out; + } + + return st; +@@ -507,8 +508,7 @@ + #endif + + int putdic(const std::string& in_word, Hunspell* pMS) { +- std::string word(in_word); +- chenc(word, ui_enc, dic_enc[0]); ++ std::string word = chenc(in_word, ui_enc, dic_enc[0]); + + std::string buf; + pMS->input_conv(word.c_str(), buf); +@@ -565,7 +565,7 @@ + if (!dic) + return 0; + for (size_t i = 0; i < w.size(); ++i) { +- chenc(w[i], io_enc, ui_enc); ++ w[i] = chenc(w[i], io_enc, ui_enc); + fprintf(dic, "%s\n", w[i].c_str()); + } + fclose(dic); +@@ -595,8 +595,7 @@ + // check words in the dictionaries (and set first checked dictionary) + bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) { + for (int i = 0; i < dmax; ++i) { +- std::string buf(token); +- chenc(buf, io_enc, dic_enc[*d]); ++ std::string buf = chenc(token, io_enc, dic_enc[*d]); + mystrrep(buf, ENTITY_APOS, "'"); + if (checkapos && buf.find('\'') != std::string::npos) + return false; +@@ -937,7 +936,7 @@ + fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str()); + } + for (size_t j = 1; j < wlst.size(); ++j) { +- fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); ++ fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str()); + } + fprintf(stdout, "\n"); + fflush(stdout); +@@ -1194,8 +1193,7 @@ + } + + std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) { +- std::string utf8str(token); +- chenc(utf8str, ioenc, "UTF-8"); ++ std::string utf8str = chenc(token, ioenc, "UTF-8"); + std::vector u; + u8_u16(u, utf8str); + if (!u.empty()) { +@@ -1206,8 +1204,7 @@ + } + std::string scratch; + u16_u8(scratch, u); +- chenc(scratch, "UTF-8", ioenc); +- return scratch; ++ return chenc(scratch, "UTF-8", ioenc); + } + + // for terminal interface +@@ -1532,13 +1529,13 @@ + std::vector wlst; + dialogscreen(parser, token, filename, info, wlst); // preview + refresh(); +- std::string buf(token); +- wlst = pMS[d]->suggest(mystrrep(chenc(buf, io_enc, dic_enc[d]), ENTITY_APOS, "'").c_str()); ++ std::string dicbuf = chenc(token, io_enc, dic_enc[d]); ++ wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str()); + if (wlst.empty()) { + dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); + } else { + for (size_t j = 0; j < wlst.size(); ++j) { +- chenc(wlst[j], dic_enc[d], io_enc); ++ wlst[j] = chenc(wlst[j], dic_enc[d], io_enc); + } + dialogexit = dialog(parser, pMS[d], token, filename, wlst, info); + }