00001
00002 #include "define.h"
00003
00004 static int unicode_up = 0;
00005 static iconv_t i16to8;
00006 static const char *target_charset = NULL;
00007 static int target_open_from = 0;
00008 static int target_open_to = 0;
00009 static iconv_t i8totarget = (iconv_t)-1;
00010 static iconv_t target2i8 = (iconv_t)-1;
00011
00012
00013 #define ASSERT(x,...) { if( !(x) ) DIE(( __VA_ARGS__)); }
00014
00015
00018 static void pst_vbresize(pst_vbuf *vb, size_t len);
00019 static void pst_vbresize(pst_vbuf *vb, size_t len)
00020 {
00021 vb->dlen = 0;
00022
00023 if (vb->blen >= len) {
00024 vb->b = vb->buf;
00025 return;
00026 }
00027
00028 vb->buf = realloc(vb->buf, len);
00029 vb->b = vb->buf;
00030 vb->blen = len;
00031 }
00032
00033
00034 static size_t pst_vbavail(pst_vbuf * vb);
00035 static size_t pst_vbavail(pst_vbuf * vb)
00036 {
00037 return vb->blen - vb->dlen - (size_t)(vb->b - vb->buf);
00038 }
00039
00040
00041 static void open_targets(const char* charset);
00042 static void open_targets(const char* charset)
00043 {
00044 if (!target_charset || strcasecmp(target_charset, charset)) {
00045 if (target_open_from) iconv_close(i8totarget);
00046 if (target_open_to) iconv_close(target2i8);
00047 if (target_charset) free((char *)target_charset);
00048 target_charset = strdup(charset);
00049 target_open_from = 1;
00050 target_open_to = 1;
00051 i8totarget = iconv_open(target_charset, "utf-8");
00052 if (i8totarget == (iconv_t)-1) {
00053 target_open_from = 0;
00054 DEBUG_WARN(("Couldn't open iconv descriptor for utf-8 to %s.\n", target_charset));
00055 }
00056 target2i8 = iconv_open("utf-8", target_charset);
00057 if (target2i8 == (iconv_t)-1) {
00058 target_open_to = 0;
00059 DEBUG_WARN(("Couldn't open iconv descriptor for %s to utf-8.\n", target_charset));
00060 }
00061 }
00062 }
00063
00064
00065 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion);
00066 static size_t sbcs_conversion(pst_vbuf *dest, const char *inbuf, int iblen, iconv_t conversion)
00067 {
00068 size_t inbytesleft = iblen;
00069 size_t icresult = (size_t)-1;
00070 size_t outbytesleft = 0;
00071 char *outbuf = NULL;
00072 int myerrno;
00073
00074 pst_vbresize(dest, 2*iblen);
00075
00076 do {
00077 outbytesleft = dest->blen - dest->dlen;
00078 outbuf = dest->b + dest->dlen;
00079 icresult = iconv(conversion, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00080 myerrno = errno;
00081 dest->dlen = outbuf - dest->b;
00082 if (inbytesleft) pst_vbgrow(dest, 2*inbytesleft);
00083 } while ((size_t)-1 == icresult && E2BIG == myerrno);
00084
00085 if (icresult == (size_t)-1) {
00086 DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00087 pst_unicode_init();
00088 return (size_t)-1;
00089 }
00090 return (icresult) ? (size_t)-1 : 0;
00091 }
00092
00093
00094 static void pst_unicode_close();
00095 static void pst_unicode_close()
00096 {
00097 iconv_close(i16to8);
00098 if (target_open_from) iconv_close(i8totarget);
00099 if (target_open_to) iconv_close(target2i8);
00100 if (target_charset) free((char *)target_charset);
00101 target_charset = NULL;
00102 target_open_from = 0;
00103 target_open_to = 0;
00104 unicode_up = 0;
00105 }
00106
00107
00108 static int utf16_is_terminated(const char *str, int length);
00109 static int utf16_is_terminated(const char *str, int length)
00110 {
00111 int len = -1;
00112 int i;
00113 for (i = 0; i < length; i += 2) {
00114 if (str[i] == 0 && str[i + 1] == 0) {
00115 len = i;
00116 }
00117 }
00118
00119 if (len == -1) {
00120 DEBUG_WARN(("utf16 string is not zero terminated\n"));
00121 }
00122
00123 return (len == -1) ? 0 : 1;
00124 }
00125
00126
00127 pst_vbuf *pst_vballoc(size_t len)
00128 {
00129 pst_vbuf *result = pst_malloc(sizeof(pst_vbuf));
00130 if (result) {
00131 result->dlen = 0;
00132 result->blen = 0;
00133 result->buf = NULL;
00134 pst_vbresize(result, len);
00135 }
00136 else DIE(("malloc() failure"));
00137 return result;
00138 }
00139
00140
00143 void pst_vbgrow(pst_vbuf *vb, size_t len)
00144 {
00145 if (0 == len)
00146 return;
00147
00148 if (0 == vb->blen) {
00149 pst_vbresize(vb, len);
00150 return;
00151 }
00152
00153 if (vb->dlen + len > vb->blen) {
00154 if (vb->dlen + len < vb->blen * 1.5)
00155 len = vb->blen * 1.5;
00156 char *nb = pst_malloc(vb->blen + len);
00157 if (!nb) DIE(("malloc() failure"));
00158 vb->blen = vb->blen + len;
00159 memcpy(nb, vb->b, vb->dlen);
00160
00161 free(vb->buf);
00162 vb->buf = nb;
00163 vb->b = vb->buf;
00164 } else {
00165 if (vb->b != vb->buf)
00166 memcpy(vb->buf, vb->b, vb->dlen);
00167 }
00168
00169 vb->b = vb->buf;
00170
00171 ASSERT(pst_vbavail(vb) >= len, "vbgrow(): I have failed in my mission.");
00172 }
00173
00174
00177 void pst_vbset(pst_vbuf * vb, void *b, size_t len)
00178 {
00179 pst_vbresize(vb, len);
00180 memcpy(vb->b, b, len);
00181 vb->dlen = len;
00182 }
00183
00184
00187 void pst_vbappend(pst_vbuf *vb, void *b, size_t len)
00188 {
00189 if (0 == vb->dlen) {
00190 pst_vbset(vb, b, len);
00191 return;
00192 }
00193 pst_vbgrow(vb, len);
00194 memcpy(vb->b + vb->dlen, b, len);
00195 vb->dlen += len;
00196 }
00197
00198
00199 void pst_unicode_init()
00200 {
00201 if (unicode_up) pst_unicode_close();
00202 i16to8 = iconv_open("utf-8", "utf-16le");
00203 if (i16to8 == (iconv_t)-1) {
00204 DEBUG_WARN(("Couldn't open iconv descriptor for utf-16le to utf-8.\n"));
00205 }
00206 unicode_up = 1;
00207 }
00208
00209
00210 size_t pst_vb_utf16to8(pst_vbuf *dest, const char *inbuf, int iblen)
00211 {
00212 size_t inbytesleft = iblen;
00213 size_t icresult = (size_t)-1;
00214 size_t outbytesleft = 0;
00215 char *outbuf = NULL;
00216 int myerrno;
00217
00218 if (!unicode_up) return (size_t)-1;
00219 pst_vbresize(dest, iblen);
00220
00221
00222 if (!utf16_is_terminated(inbuf, iblen))
00223 return (size_t)-1;
00224
00225 do {
00226 outbytesleft = dest->blen - dest->dlen;
00227 outbuf = dest->b + dest->dlen;
00228 icresult = iconv(i16to8, (ICONV_CONST char**)&inbuf, &inbytesleft, &outbuf, &outbytesleft);
00229 myerrno = errno;
00230 dest->dlen = outbuf - dest->b;
00231 if (inbytesleft) pst_vbgrow(dest, inbytesleft);
00232 } while ((size_t)-1 == icresult && E2BIG == myerrno);
00233
00234 if (icresult == (size_t)-1) {
00235 DEBUG_WARN(("iconv failure: %s\n", strerror(myerrno)));
00236 pst_unicode_init();
00237 return (size_t)-1;
00238 }
00239 return (icresult) ? (size_t)-1 : 0;
00240 }
00241
00242
00243 size_t pst_vb_utf8to8bit(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00244 {
00245 open_targets(charset);
00246 if (!target_open_from) return (size_t)-1;
00247 return sbcs_conversion(dest, inbuf, iblen, i8totarget);
00248 }
00249
00250
00251 size_t pst_vb_8bit2utf8(pst_vbuf *dest, const char *inbuf, int iblen, const char* charset)
00252 {
00253 open_targets(charset);
00254 if (!target_open_to) return (size_t)-1;
00255 return sbcs_conversion(dest, inbuf, iblen, target2i8);
00256 }
00257