/*
 * multilingual support for nvi
 * Copyright(c) 1996, 1997 by Jun-ichiro Itoh.  All rights reserved.
 * Author contact: <itojun@mt.cs.keio.ac.jp>
 * $Id: multi_iso2022.c,v 1.1.2.4 1999/08/10 23:21:48 itojun Exp $
 *
 * Freely redistributable, reusable, unless otherwise noted in accompanying
 * document. (for example, redistribution is prohibited during alpha-test
 * period)
 * Absolutely no warranty.
 *
 * The code is based on:
 *	jelvis japanization patch by Jun-ichiro Itoh
 *	nvi 1.03 japanization patch by Yoshitaka Tokugawa <toku@dit.co.jp>
 *
 * Would like to thank Jungshik Shin <jshin@pantheon.yale.edu> for letting
 * me know about iso-2022-kr twists.
 */

#include "config.h"

#ifdef MULTIBYTE

#include <sys/types.h>
#include <sys/queue.h>

#include <bitstring.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "../common/common.h"
#include "multibyte.h"

static int parsecharset __P((CHAR_T *, size_t, CHARSET const **, int *));
static int callcharset __P((CHARSET const *, ENCODING const *, CHAR_T **,
	ISO2022STATE *, int));
static void selcharset __P((CHARSET const *, ENCODING const *, CHAR_T **,
	CHAR_T *, ISO2022STATE *, int));

/*
 * Parse charset designation.
 * Returns 0 <= x if it is proper charset designation.
 * Otherwise returns -1.
 * If it detects unsupported designation, it returns 0.
 * The caller must make a proper check for this case.
 */
static int
parsecharset(p, len, hcs, pgroup)
	CHAR_T *p;
	size_t len;
	CHARSET const **hcs;
	int *pgroup;
{
	size_t i;
	CSTYPE cstype;
	CHAR_T version;
	CHAR_T ch;
	int csok;
	int mb;
	int advance;

	if (! (1 <= len && p[0] == '\033'))
		return -1;

	i = 1;
	version = 0;
	if (4 <= len && p[i] == '&' && isecma(p[i + 1]) && p[i + 2] == '\033') {
		version = p[i + 1];
		i += 3;
	}

	csok = 0;
	mb = 0;
	for (/*nothing*/; i < len; i++) {
		switch (p[i]) {
		case '$':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 0;
			cstype = CS94;
			mb = 1;
			break;
		case '(':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 0;
			cstype = CS94;
			csok = 1;
			break;
		case ')':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 1;
			cstype = CS94;
			csok = 1;
			break;
		case '*':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 2;
			cstype = CS94;
			csok = 1;
			break;
		case '+':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 3;
			cstype = CS94;
			csok = 1;
			break;
		case ',':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 0;
			cstype = CS96;
			csok = 1;
			break;
		case '-':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 1;
			cstype = CS96;
			csok = 1;
			break;
		case '.':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 2;
			cstype = CS96;
			csok = 1;
			break;
		case '/':
			if (csok)
				goto fail;
			if (pgroup)
				*pgroup = 3;
			cstype = CS96;
			csok = 1;
			break;
		default:
			if (!csok && !mb)
				goto fail;
			ch = p[i++];
			if (!isecma(ch))
				goto fail;
			advance = i;
			goto found;
		}
	}

fail:
	/*
	 * The string is not a proper charset designator.
	 */
	return -1;

found:
	/*
	 * The string is properly-formatted charset designator.
	 */
	if (mb) {
		if (cstype == CS94)
			cstype = CS9494;
		else
			cstype = CS9696;
	}

	/*
	 * ASCII (CS_NONE) is hardcoded here.
	 */
	if (cstype == CS94 && ch == 'B') {
		if (hcs)
			*hcs = NULL;
		return advance;
	}
		
	for (i = 0; charsettab[i].name; i++) {
		if (! charsettab[i].enter)
			continue;
		if (charsettab[i].enter != ch)
			continue;
		if (charsettab[i].type != cstype)
			continue;
		if (charsettab[i].version != version)
			continue;

		if (hcs)
			*hcs = &charsettab[i];
		return advance;
	}

	/*
	 * Unsupported charset was properly designated.  We treat it as
	 * invalid designation.
	 */
	if (hcs)
		*hcs = NULL;
	advance = 0;
	return advance;
}

/*
 * designate specified charset to some of G0/1/2/3.
 * NOTE: it does not emit single shift/locking shift.
 */
static int
callcharset(cs, e, ht, state, prefer)
	CHARSET const *cs;
	ENCODING const *e;
	CHAR_T **ht;
	ISO2022STATE *state;
	int prefer;
{
	static CHAR_T sequence[30];
	CSTYPE type;
	int group;
	char finalchar;
	char versionchar;
	char *d;
	char designation[4];
	size_t i;
	size_t j;
	int sanity;

	*ht = sequence;
	i = 0;
	sanity = 0;

	/*
	 * policy:
	 * - avoid using ESC , x when possible.
	 */

	/*
	 * If the encoding-supplied function works, use that.
	 */
	if (e->recommendation && prefer == -1)
		prefer = (*e->recommendation)(cs, e);

again:
	if (10 < sanity++)
		return -1;

	sequence[i] = '\0';

	/*
	 * 1.
	 * Do we already have the charset on G0/1/2/3, and there's no
	 * preferred location?  If so, we don't need to designate that again.
	 */
	for (j = 0; j < e->maxcharset; j++) {
		if (state->charsets[j] == cs && (prefer == -1 || prefer == j))
			return j;
	}

	/*
	 * 2.
	 * Invoke the target character set onto G0/1/2/3.
	 * The most tricky part is, how to choose G0/1/2/3.
	 */

	/*
	 * 2.1.
	 * Understand on which table (G0/1/2/3) we can map it.
	 * Note that we would like to avoid ESC , x (96 chars charset on G0)
	 * when possible.
	 */
	if (cs == NULL) {
		type = CS94;
		finalchar = 'B';
		versionchar = '\0';
	} else {
		type = cs->type;
		finalchar = cs->enter;
		versionchar = cs->version;
	}

	switch (type) {
	case CS94:
		d = "()*+";
		break;
	case CS96:
		if (prefer != 0 && 1 < e->maxcharset)
			d = "\0-./";
		else
			d = ",-./";
		break;
	case CS9494:
	case CS9696:
		if (cs->oldstyle && !F_ISSET(e, E_NOOLDSTYLE))
			d = "\001)*+";
		else
			d = "()*+";
		break;
	}

	/*
	 * 2.2.
	 * Omit designations that are not available for us.
	 */
	memcpy(designation, d, 4);
	for (j = e->maxcharset; j < 4; j++)
		designation[j] = '\0';

#if 0	/* last resort */
	if (!(e->shifttype & S_0))
		designation[0] = '\0';
#endif
	if (1 == e->maxside && !(e->shifttype & S_1))
		designation[1] = '\0';
	if (!(e->shifttype & S_2))
		designation[2] = '\0';
	if (!(e->shifttype & S_3))
		designation[3] = '\0';

	/*
	 * 2.3.
	 * If the caller prefers some of G0/1/2/3, and it is impossible,
	 * guess it automatically.
	 */
	if (prefer != -1 && designation[prefer] == '\0')
		prefer = -1;

	/*
	 * 2.4.
	 * Make a guess.
	 */
	if (prefer == -1) {
		CHAR_T tcs;

		/*
		 * If the target charset is listed on the initial mapping,
		 * that location is preferred.
		 */
		for (j = 0; j < e->maxcharset; j++) {
			if (designation[j] == '\0')
				continue;

			switch (j) {
			case 0:
				tcs = e->initg0;
				break;
			case 1:
				tcs = e->initg1;
				break;
			case 2:
				tcs = e->initg2;
				break;
			case 3:
				tcs = e->initg3;
				break;
			}
			if (ischarset(tcs) && (cs == &charset(tcs))) {
				prefer = j;
				goto guessed;
			} else if (tcs == CS_NONE && cs == NULL) {
				prefer = j;
				goto guessed;
			}
		}

		/*
		 * Last resort.  Use the slot available.
		 */
		for (j = 0; j <= 4; j++) {
			if (designation[j]) {
				prefer = j;
				goto guessed;
			}
		}
	}

	/*
	 * seems to me that the encoding specification is invalid.
	 * bark.
	 */
	if (prefer == -1)
		return -1;

guessed:
	/*
	 * 3. emit the designation.
	 */
	group = prefer;
	if (state->charsets[group] == cs)
		goto again;

	if (versionchar) {
		sequence[i++] = '\033';
		sequence[i++] = '&';
		sequence[i++] = versionchar;
	}

	sequence[i++] = '\033';

	if (type == CS9494 || type == CS9696)
		sequence[i++] = '$';

	if (designation[group] == '\0')
		return -1;
	else if (designation[group] == '\001')
		; /*skip it*/
	else
		sequence[i++] = designation[group];

	sequence[i++] = finalchar;

	state->charsets[group] = cs;

	goto again;
}

/*
 * emit iso2022 charset designation, including locking shifts.
 */
static void
selcharset(cs, e, ht, pmask, state, prefer)
	CHARSET const *cs;
	ENCODING const *e;
	CHAR_T **ht;
	CHAR_T *pmask;
	ISO2022STATE *state;
	int prefer;
{
#define GLcharset	(state->charsets[state->gl])
#define GRcharset	(state->charsets[state->gr])
	static CHAR_T sequence[30];
	size_t i;
	int group;
	int sanity;
	int shifttype;
	size_t j;
	int singleshift;

	*pmask = 0x00;
	sanity = 0;
	singleshift = 0;

	/*
	 * policy:
	 * - avoid locking shift when possible.
	 */

	/*
	 * 1.
	 * designte the charset to somewhere.
	 */
	group = callcharset(cs, e, ht, state, prefer);

	strcpy(sequence, *ht);
	*ht = sequence;
	i = strlen(sequence);
	sequence[i] = '\0';

again:
	/* XXX bad idea... */
	if (10 < sanity++)
		abort();

	/*
	 * 2.
	 * if the specified charset is on GL/GR plane, we are done.
	 */
	if (singleshift)
		return;
	if (group == state->gl && 1 <= e->maxside && GLcharset == cs)
		return;
	if (group == state->gr && 2 <= e->maxside && GRcharset == cs) {
		*pmask = 0x80;
		return;
	}

	/*
	 * 3.
	 * we already have the target charset on G0/1/2/3, and they are
	 * not on GL/GR plane.
	 * use locking shift or single shift.
	 */
	shifttype = e->shifttype;
	switch (group) {
	case 0: shifttype &= S_0; break;
	case 1: shifttype &= S_1; break;
	case 2: shifttype &= S_2; break;
	case 3: shifttype &= S_3; break;
	}

	if (! shifttype)
		abort();

	j = 1;
	while (!(j & shifttype))
		j = j << 1;

	switch (j) {
	case S_LS0:	/*0F*/
		sequence[i++] = SI;
		state->gl = 0;
		break;
	case S_LS1:	/*0E*/
		sequence[i++] = SO;
		state->gl = 1;
		break;
	case S_LS2:	/*ESC n*/
		sequence[i++] = '\033';
		sequence[i++] = 'n';
		state->gl = 2;
		break;
	case S_LS3:	/*ESC o*/
		sequence[i++] = '\033';
		sequence[i++] = 'o';
		state->gl = 3;
		break;
	case S_SS2:	/*ESC N*/
		sequence[i++] = '\033';
		sequence[i++] = 'N';
		singleshift = 1;
		*pmask = 0x00;
		break;
	case S_SS3:	/*ESC O*/
		sequence[i++] = '\033';
		sequence[i++] = 'O';
		singleshift = 1;
		*pmask = 0x00;
		break;
	case S_LS1R:	/*ESC ~*/
		sequence[i++] = '\033';
		sequence[i++] = '~';
		state->gr = 1;
		break;
	case S_LS2R:	/*ESC }*/
		sequence[i++] = '\033';
		sequence[i++] = '}';
		state->gr = 2;
		break;
	case S_LS3R:	/*ESC |*/
		sequence[i++] = '\033';
		sequence[i++] = '|';
		state->gr = 3;
		break;
	case S_SS2R:	/*8E*/
		sequence[i++] = 0x8e;
		singleshift = 1;
		*pmask = 0x80;
		break;
	case S_SS3R:	/*8F*/
		sequence[i++] = 0x8f;
		singleshift = 1;
		*pmask = 0x80;
		break;
	default:
		abort();
	}
	sequence[i] = '\0';

	goto again;
		
#undef GLcharset
#undef GRcharset
}


/*
 * generic ISO-2022-* parser.  supports 8bit input to some extent.
 */
int
iso2022_to_int(sp, e, dst, pdlen, src, slen, pflags, state)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *dst;
	size_t *pdlen;
	CHAR_T *src;
	size_t slen;
	int *pflags;
	ISO2022STATE *state;
{
	CHAR_T *p;
	CHAR_T *q;
	size_t i;
	CHARSET const *cs;
	int group;
	int advance;

	ISO2022STATE t;
	int singlegl = 0;
	int singlegr = 0;
#define	GLcharset	(state->charsets[(singlegl ? singlegl : state->gl)])
#define	GRcharset	(state->charsets[(singlegr ? singlegr : state->gr)])

	group = -1;
	advance = -1;

	/*
	 * XXX
	 * G0 and G1, GL and GR charset will be re-initialized at SOL.
	 */
	if (!state) {
		state = &t;	/*XXX*/
		multi_initstate(sp, e, state);
	} else {
		if (F_ISSET(e, E_SOLRESET))
			multi_initstate(sp, e, state);
	}

	q = dst;
	p = src;
	if (pflags)
		FL_INIT(*pflags, 0);

	for (i = 0; i < slen; i++) {
		if (p[0] == '\033'
		 && 0 <= (advance = parsecharset(p, slen - i, &cs, &group))) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			state->charsets[group] = cs;
			if (advance) {
				i += (advance - 1);/*plus one more at for loop*/
				p += advance;
				continue;
			}

			/*
			 * Unsupported charset.
			 */
			goto raw;
		}

		/*
		 * locking shift
		 */
		if (p[0] == SI) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			state->gl = 0;
			p++;
			continue;
		}
		if (p[0] == SO) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			state->gl = 1;
			p++;
			continue;
		}
		if (i + 1 < slen && p[0] == '\033' && strchr("no", p[1])) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			state->gl = p[1] - 'n' + 2;
			p += 2;
			i++;
			continue;
		}
		if (i + 1 < slen && p[0] == '\033' && strchr("~}|", p[1])) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			state->gr = 3 - (p[1] - '|');
			p += 2;
			i++;
			continue;
		}
		
		/*
		 * single shift
		 */
		if (i + 1 < slen && p[0] == '\033' && strchr("NO", p[1])) {
			singlegl = p[1] - 'N' + 2;
			p += 2;
			i += 2;
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			/* fall through... */
		} else if (p[0] == 0x8e || p[0] == 0x8f) {
			singlegl = p[0] - 0x8e + 2;
			singlegr = p[0] - 0x8e + 2;
			p++;
			i++;
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			/* fall through... */
		}
	
		/*
		 * ASCII mode chars.
		 */
		if ((p[0] & 0x80) == 0x00 && GLcharset == NULL)
			goto raw;
		if ((p[0] & 0x80) && GRcharset == NULL)
			goto raw;
		
		if (p[0] & 0x80) {
			/*
			 * character range check.
			 */
			switch (GRcharset->type) {
			case CS9696:
				if (i + 1 < slen) {
					if (0xa0 <= p[0] && p[0] <= 0xff
					 && 0xa0 <= p[1] && p[1] <= 0xff)
						break;
				}
				goto raw;
			case CS9494:
				if (i + 1 < slen) {
					if (0xa1 <= p[0] && p[0] <= 0xfe
					 && 0xa1 <= p[1] && p[1] <= 0xfe)
						break;
				}
				goto raw;
			case CS96:
				if (0xa0 <= p[0] && p[0] <= 0xff)
					break;
				goto raw;
			case CS94:
				if (0xa1 <= p[0] && p[0] <= 0xfe)
					break;
				goto raw;
			}

			if (GRcharset->type == CS9494
			 || GRcharset->type == CS9696) {
				if (dst) {
					*q++ = CS_RAW0 + (GRcharset - charsettab);
					*q++ = *p++ & 0x7f;
					*q++ = *p++ & 0x7f;
				} else {
					q += 3;
					p += 2;
				}
				i++;	/*plus one more at for loop*/
			} else {
				if (dst) {
					*q++ = CS_RAW0 + (GRcharset - charsettab);
					*q++ = *p++ & 0x7f;
				} else {
					q += 2;
					p++;
				}
			}
		} else {
			/*
			 * character range check.
			 */
			switch (GLcharset->type) {
			case CS9696:
				if (i + 1 < slen) {
					if (0x20 <= p[0] && p[0] <= 0x7f
					 && 0x20 <= p[1] && p[1] <= 0x7f)
						break;
				}
				goto raw;
			case CS9494:
				if (i + 1 < slen) {
					if (0x21 <= p[0] && p[0] <= 0x7e
					 && 0x21 <= p[1] && p[1] <= 0x7e)
						break;
				}
				goto raw;
			case CS96:
				if (0x20 <= p[0] && p[0] <= 0x7f)
					break;
				goto raw;
			case CS94:
				if (0x21 <= p[0] && p[0] <= 0x7e)
					break;
				goto raw;
			}

			if (GLcharset->type == CS9494
			 || GLcharset->type == CS9696) {
				if (dst) {
					*q++ = CS_RAW0 + (GLcharset - charsettab);
					*q++ = *p++ & 0x7f;
					*q++ = *p++ & 0x7f;
				} else {
					q += 3;
					p += 2;
				}
				i++;	/*plus one more at for loop*/
			} else {
				if (dst) {
					*q++ = CS_RAW0 + (GLcharset - charsettab);
					*q++ = *p++ & 0x7f;
				} else {
					q += 2;
					p++;
				}
			}
		}

		if (pflags)
			FL_SET(*pflags, MB_MULTIBYTE);
		singlegl = singlegr = 0;
		continue;

raw:
		if (p[0] & 0x80) {
			if (dst) {
				*q++ = CS_RAW0 + v_key_len(sp, p[0]);
				*q++ = *p++ & 0x7f;
			} else {
				q += 2;
				p++;
			}
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE|MB_RAW);
			singlegl = singlegr = 0;
			continue;
		}

		if (dst)
			*q++ = *p++;
		else {
			q++;
			p++;
		}
		singlegl = singlegr = 0;
	}

	if (pdlen)
		*pdlen = q - dst;
	return 0;
#undef GLcharset
#undef GRcharset
}

/*
 * generic iso-2022-* generator.
 */
int
int_to_iso2022(sp, e, dst, pdlen, src, slen, pflags, state)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *dst;
	size_t *pdlen;
	CHAR_T *src;
	size_t slen;
	int *pflags;
	ISO2022STATE *state;
{
	CHAR_T *p;
	CHAR_T *q;
	CHAR_T *t;
	size_t i;
	CHAR_T mask;
	CHARSET const *cs;

	ISO2022STATE tstate;
#define	GLcharset	state->charsets[state->gl]
#define	GRcharset	state->charsets[state->gr]

	/*
	 * XXX
	 * G0 and G1, GL and GR charset will be re-initialized at SOL.
	 */
	if (!state) {
		state = &tstate;	/*XXX*/
		multi_initstate(sp, e, state);
	} else {
		if (F_ISSET(e, E_SOLRESET))
			multi_initstate(sp, e, state);
	}

	q = dst;
	p = src;
	if (pflags)
		FL_INIT(*pflags, 0);
	for (i = 0; i < slen; i++) {
		if ((p[0] & 0x80) == 0x00)
			goto raw;
	
		if (CS_RAW(p[0]))
			goto raw;

		if (p[0] & 0x80) {
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE);
			cs = &charset(p[0]);
			selcharset(cs, e, &t, &mask, state, -1);
			while (t && *t) {
				if (dst)
					*q++ = *t++;
				else {
					t++;
					q++;
				}
			}
			if (dst) {
				size_t j;

				for (j = 1; j < cs->blen; j++)
					*q++ = (p[j] & 0x7f) | mask;
				p += cs->blen;
				i += (cs->blen - 1);
			} else {
				p += cs->blen;
				q += (cs->blen - 1);
				i += (cs->blen - 1);
			}
			continue;
		}

raw:
		if (CS_RAW(p[0])) {
			if (1 < e->maxside)
				selcharset(NULL, e, &t, &mask, state, 1);
			else
				t = NULL;
		} else
			selcharset(NULL, e, &t, &mask, state, 0);
		if (t && *t && pflags)
			FL_SET(*pflags, MB_MULTIBYTE);
		while (t && *t) {
			if (dst)
				*q++ = *t++;
			else {
				t++;
				q++;
			}
		}

		if (CS_RAW(p[0])) {
			if (dst) {
				*q++ = p[1] | 0x80;
				p += 2;
			} else {
				q++;
				p += 2;
			}
			i++;		/*plus one at for loop*/
			if (pflags)
				FL_SET(*pflags, MB_MULTIBYTE|MB_RAW);
			continue;
		}
		
		if (dst)
			*q++ = (*p++ & 0x7f) | mask;
		else {
			q++;
			p++;
		}
	}
	
	if (state->gl != 0) {
		if (pflags)
			FL_SET(*pflags, MB_MULTIBYTE);
		if (dst)
			*q++ = SI;
		else
			*q++;
		state->gl = 0;
	}
	if (e && F_ISSET(e, E_EOLRESET)) {
		/*
		 * Reset GR designation, if necessery.
		 *
		 * XXX will this do, for sure?
		 */
		if (2 <= e->maxcharset && 2 <= e->maxside) {
			if (ischarset(e->initg1))
				cs = &charset(e->initg1);
			else
				cs = NULL;
			selcharset(cs, e, &t, &mask, state, 1);
		}

		/*
		 * If something other than initial charsets is designated
		 * to G2/3, we reset them.
		 * G0 is omitted here since G0 will be designated later.
		 */
		for (i = 1; i < e->maxcharset; i++) {
			switch (i) {
			case 0:
				if (ischarset(e->initg0))
					cs = &charset(e->initg0);
				else
					cs = NULL;
				break;
			case 1:
				if (ischarset(e->initg1))
					cs = &charset(e->initg1);
				else
					cs = NULL;
				break;
			case 2:
				if (ischarset(e->initg2))
					cs = &charset(e->initg2);
				else
					cs = NULL;
				break;
			case 3:
				if (ischarset(e->initg3))
					cs = &charset(e->initg3);
				else
					cs = NULL;
				break;
			}

			if (state->charsets[i] != cs) {
				callcharset(cs, e, &t, state, i);
				while (t && *t) {
					if (dst)
						*q++ = *t++;
					else {
						t++;
						q++;
					}
				}
				if (pflags)
					FL_SET(*pflags, MB_MULTIBYTE);
			}
		}
	}

	/*
	 * should always return to ASCII on EOL
	 */
	selcharset(NULL, e, &t, &mask, state, 0);
	while (t && *t) {
		if (dst)
			*q++ = *t++;
		else {
			t++;
			q++;
		}
	}
	if (pflags)
		FL_SET(*pflags, MB_MULTIBYTE);

	if (pdlen)
		*pdlen = q - dst;
	return 0;
#undef GLcharset
#undef GRcharset
}

/*
 * special treatment for iso-2022-kr.
 * it must remove lines with KSC5601 designation only.
 */
int
iso2022kr_to_int(sp, e, dst, pdlen, src, slen, pflags, state)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *dst;
	size_t *pdlen;
	CHAR_T *src;
	size_t slen;
	int *pflags;
	ISO2022STATE *state;
{
	int ret;

	ret = iso2022_to_int(sp, e, dst, pdlen, src, slen, pflags, state);
	if (slen == 4 && strncmp(src, "\033$)C", 4) == 0) {
		if (pdlen)
			*pdlen = 0;
		if (pflags)
			FL_SET(*pflags, MB_MULTIBYTE|MB_JOIN);
	}
	return ret;
}

/*
 * Special treatment for iso-2022-kr.
 * KSC5601 designation must appear at the beginning of the line.
 */
int
int_to_iso2022kr(sp, e, dst, pdlen, src, slen, pflags, state)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *dst;
	size_t *pdlen;
	CHAR_T *src;
	size_t slen;
	int *pflags;
	ISO2022STATE *state;
{
	size_t i;
	CHAR_T *p;
	CHAR_T *t;
	size_t offset;
	CHARSET const *cs;
	int needdesig;
	int ret;

	if (!state)
		goto standard;
	if (state->charsets[1] == &charset(CS_KSC5601))
		goto standard;

	t = NULL;
	needdesig = 0;
	p = src;
	for (i = 0; i < slen; i++) {
		if (p[0] & 0x80) {
			cs = &charset(p[0]);
			if (p[0] == CS_KSC5601) {
				needdesig++;
				break;
			} else {
				p += cs->blen;
				i += (cs->blen - 1);	/*one more at loop*/
			}
		} else
			p++;
	}
	if (!needdesig)
		goto standard;

	/*
	 * we need a KSC5601 designation at the beginning of the line.
	 *
	 * XXX
	 * very dirty hack.
	 */
	callcharset(cs, e, &t, state, -1);
	offset = 0;
	if (t) {
		/* compute the length of the designation. */
		p = t;
		while (*p++)
			offset++;

		/* copy the designation into the buffer. */
		if (dst) {
			p = t;
			while (*p)
				dst[p - t] = *p++;
		}
	}

	ret = int_to_iso2022(sp, e, dst ? dst + offset : NULL, pdlen,
		src, slen, pflags, state);
	if (pdlen)
		*pdlen += offset;
	if (pflags)
		FL_SET(*pflags, MB_MULTIBYTE);

	return ret;

standard:
	return int_to_iso2022(sp, e, dst, pdlen, src, slen, pflags, state);
}


/*
 * special treatment for iso-2022-jp-3.
 * it must turn JIS X0208 designation into JIS X0213.1.
 */
int
iso2022jp3_to_int(sp, e, dst, pdlen, src, slen, pflags, state)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *dst;
	size_t *pdlen;
	CHAR_T *src;
	size_t slen;
	int *pflags;
	ISO2022STATE *state;
{
	int ret;
	size_t i;

	ret = iso2022_to_int(sp, e, dst, pdlen, src, slen, pflags, state);
	if (!dst || !pdlen)
		return ret;

	for (i = 0; i < *pdlen; i++) {
		if (dst[i] == CS_JISX0208_1983)
			dst[i] = CS_JISX0213_1;
	}
	return ret;
}


/*
 * Returns the recommended plane (G0/1/2/3) to map the specified charset.
 */
int
iso2022cn_recommendation(cs, e)
	CHARSET const *cs;
	ENCODING const *e;
{
	CHAR_T ch;

	if (cs == NULL)
		return 0;

	ch = CS_RAW0 + (cs - charsettab);
	switch (ch) {
	case CS_GB2312:
	case CS_ISO_IR165:
	case CS_CNS11643_1:
		return 1;

	case CS_CNS11643_2:
		return 2;

	case CS_CNS11643_3:
	case CS_CNS11643_4:
	case CS_CNS11643_5:
	case CS_CNS11643_6:
	case CS_CNS11643_7:
		return 3;
	}
	
	return 0;
}

int
iso2022jp_recommendation(cs, e)
	CHARSET const *cs;
	ENCODING const *e;
{
	if (cs == NULL)
		return 0;

	switch (cs->type) {
	case CS94:
	case CS9494:
		return 0;
	case CS96:
	case CS9696:
		return 2;
	}
}

int
iso2022kr_recommendation(cs, e)
	CHARSET const *cs;
	ENCODING const *e;
{
	CHAR_T ch;

	if (cs == NULL)
		return 0;

	ch = CS_RAW0 + (cs - charsettab);
	if (ch == CS_KSC5601)
		return 1;
	
	return 0;
}

int
ctext_recommendation(cs, e)
	CHARSET const *cs;
	ENCODING const *e;
{
	CHAR_T ch;

	if (cs == NULL)
		return 0;

	/* exceptions */
	ch = CS_RAW0 + (cs - charsettab);
	switch (ch) {
	case CS_NONE:
	case CS_JISX0201_LEFT:	/* XXX should it be listed? */
		return 0;
	case CS_JISX0201_RIGHT:
		return 1;
	}

	/* general rules */
	switch (cs->type) {
	case CS94:
	case CS9494:
		break;
	case CS96:
	case CS9696:
		return 1;
	}

	return -1;
}


/* XXX need heavy rework! */
void
iso2022_keyinput(sp, e, kbuf, kbuflen, intbuf, pintbuflen, pstate, pconsumed)
	SCR *sp;
	ENCODING const *e;
	CHAR_T *kbuf;
	size_t kbuflen;
	CHAR_T *intbuf;
	size_t *pintbuflen;
	int *pstate;
	size_t *pconsumed;
{
	size_t i;
	size_t j;
	size_t consumed;

	size_t sequencelen;
	int group;
	CHARSET const *cs;
	size_t advance;
	CHAR_T c;
	int shifttype;

	struct {
		u_int g0:7;
		u_int g1:7;
		u_int g2:7;
		u_int g3:7;
		u_int gl:2;
		u_int gr:2;
	} packedform;

	ISO2022STATE isostate;
	int singlegl;
	int singlegr;
#define GLcharset	(isostate.charsets[(singlegl ? singlegl : isostate.gl)])
#define GRcharset	(isostate.charsets[(singlegr ? singlegr : isostate.gr)])

	shifttype = e->shifttype;
#if 0
	/*
	 * if you think this is important to accept all available shift
	 * sequences, enable the following.
	 */
	shifttype = S_ALL;
#endif

	/*
	 * grab state information
	 */
    {
	/* sanity */
	if (sizeof(packedform) != sizeof(int))
		abort();

	if (*pstate == 0)
		multi_initstate(sp, e, &isostate);
	else {
		int *t;
		CHAR_T ch;
#define X(x)	((x) ? (0x80 | (x)) : (x))

		t = (int *)&packedform;

		*t = *pstate;
		ch = X(packedform.g0);
		isostate.charsets[0] = ischarset(ch) ? &charset(ch) : NULL;
		ch = X(packedform.g1);
		isostate.charsets[1] = ischarset(ch) ? &charset(ch) : NULL;
		ch = X(packedform.g2);
		isostate.charsets[2] = ischarset(ch) ? &charset(ch) : NULL;
		ch = X(packedform.g3);
		isostate.charsets[3] = ischarset(ch) ? &charset(ch) : NULL;
		isostate.gl = packedform.gl;
		isostate.gr = packedform.gr;
#undef X
	}
    }

	singlegl = singlegr = 0;
	i = j = consumed = 0;
	while (i < kbuflen) {
		/*
		 * designation
		 */
		if (kbuf[i] == '\033') {
			if (i + 1 == kbuflen) {
				i = kbuflen;
				continue;
			}

			/*
			 * 2 chars sequences - locking shift
			 */
			if (strchr("no", c = kbuf[i + 1])) {
				c = c - 'n' + 2;
				if ((c == 2 && (shifttype & S_LS2))
				 || (c == 3 && (shifttype & S_LS3))) {
					isostate.gl = c;
					i += 2;
					consumed += 2;
					continue;
				} else {
					/*
					 * LS[23] not supported in this charset
					 */
					goto raw;
				}
			}
			if (strchr("~}|", c = kbuf[i + 1])) {
				c = 3 - (c - '|');
				if ((c == 1 && (shifttype & S_LS1R))
				 || (c == 2 && (shifttype & S_LS2R))
				 || (c == 3 && (shifttype & S_LS3R))) {
					isostate.gr = c;
					i += 2;
					consumed += 2;
					continue;
				} else {
					/*
					 * LS[123]R not supported in this
					 * charset
					 */
					goto raw;
				}
			}

			if (i + 2 == kbuflen) {
				i = kbuflen;
				continue;
			}

			/*
			 * 3 chars sequences
			 */
			if (kbuf[i + 1] == '$' && isecma(kbuf[i + 2])) {
				sequencelen = 3;
				goto tryit;
			}
			if (strchr("()*+,-./", kbuf[i + 1])
			 && isecma(kbuf[i + 2])) {
				sequencelen = 3;
				goto tryit;
			}

			/*
			 * single shift - ESC N and ESC O
			 * Don't consume that till the actual char has come.
			 * In this sense, single shift can be regarded as 
			 * 3 chars sequence, or 4 chars sequence.
			 */
			if (strchr("NO", c = kbuf[i + 1])) {
				c = c - 'N' + 2;
				if ((c == 2 && (shifttype & S_SS2))
				 || (c == 3 && (shifttype & S_SS3))) {
					cs = isostate.charsets[c];
					switch (cs->type) {
					case CS94:
					case CS96:
						if (i + 3 < kbuflen) {
							i = kbuflen;
							continue;
						}
						break;
					case CS9494:
					case CS9696:
						if (i + 4 < kbuflen) {
							i = kbuflen;
							continue;
						}
						break;
					}
					singlegl = c;
					i += 2;
					consumed += 2;
					goto characters;
				} else {
					/*
					 * SS[23] not supported in this charset
					 */
					goto raw;
				}
			}

			if (i + 3 == kbuflen) {
				i = kbuflen;
				continue;
			}

			/*
			 * 4 chars sequence
			 */
			if (kbuf[i + 1] == '$' && strchr("()*+", kbuf[i + 2])
			 && isecma(kbuf[i + 3])) {
				sequencelen = 4;
tryit:
				if (0 <= (advance = parsecharset(&kbuf[i],
				    sequencelen, &cs, &group))) {
					isostate.charsets[group] = cs;
					if (advance) {
						/* supported charset */
						i += advance;
						consumed += advance;
						continue;
					} else {
						/* unsupported charset */
						goto raw;
					}
				}
			}

			if (i + 4 == kbuflen) {
				i = kbuflen;
				continue;
			}

			/*
			 * more-than-4 chars sequence
			 */
			if (i + 6 <= kbuflen
			 && memcmp(&kbuf[i + 1], "&@\033", 3) == 0
			 && strchr("()*+", kbuf[i + 4])
			 && isecma(kbuf[i + 5])) {
				sequencelen = 6;
				goto tryit;
			}
			if (i + 6 <= kbuflen
			 && memcmp(&kbuf[i + 1], "&@\033$", 4) == 0
			 && isecma(kbuf[i + 5])) {
				sequencelen = 6;
				goto tryit;
			}
			if (i + 7 <= kbuflen
			 && memcmp(&kbuf[i + 1], "&@\033$", 4) == 0
			 && strchr("()*+", kbuf[i + 5])
			 && isecma(kbuf[i + 6])) {
				sequencelen = 7;
				goto tryit;
			}
		}

		/*
		 * locking shift
		 */
		if (kbuf[i] == SI && (shifttype & S_SI)) {
			isostate.gl = 0;
			i++;
			consumed++;
			continue;
		}
		if (kbuf[i] == SO && (shifttype & S_SO)) {
			isostate.gl = 1;
			i++;
			consumed++;
			continue;
		}
		if ((kbuf[i] == 0x8e && (shifttype & S_SS3R))
		 || (kbuf[i] == 0x8f && (shifttype & S_SS3R))) {
			cs = isostate.charsets[kbuf[i] - 0x8e + 2];
			switch (cs->type) {
			case CS94:
			case CS96:
				if (i + 1 < kbuflen) {
					i = kbuflen;
					continue;
				}
				break;
			case CS9494:
			case CS9696:
				if (i + 2 < kbuflen) {
					i = kbuflen;
					continue;
				}
				break;
			}

			singlegl = singlegr = kbuf[i] - 0x8e + 2;
			i++;
			consumed++;
			/* fall through */
		}

characters:
		/*
		 * ASCII mode chars.
		 */
		if ((kbuf[i] & 0x80) == 0x00 && GLcharset == NULL)
			goto raw;
		if ((kbuf[i] & 0x80) && GRcharset == NULL)
			goto raw;
		
		/*
		 * multibyte cases
		 */
		if (kbuf[i] & 0x80) {
			/*
			 * character range check.
			 */
			switch (GRcharset->type) {
			case CS9696:
				if (i + 1 < kbuflen) {
					if (0xa0 <= kbuf[i] && kbuf[i] <= 0xff
					 && 0xa0 <= kbuf[i + 1]
					 && kbuf[i + 1] <= 0xff)
						break;
					else
						goto raw;
				} else {
					i = kbuflen;
					continue;
				}
			case CS9494:
				if (i + 1 < kbuflen) {
					if (0xa1 <= kbuf[i] && kbuf[i] <= 0xfe
					 && 0xa1 <= kbuf[i + 1]
					 && kbuf[i + 1] <= 0xfe)
						break;
					else
						goto raw;
				} else {
					i = kbuflen;
					continue;
				}
			case CS96:
				if (0xa0 <= kbuf[i] && kbuf[i] <= 0xff)
					break;
				goto raw;
			case CS94:
				if (0xa1 <= kbuf[i] && kbuf[i] <= 0xfe)
					break;
				goto raw;
			}

			if (GRcharset->type == CS9494
			 || GRcharset->type == CS9696) {
				intbuf[j++]
					= CS_RAW0 + (GRcharset - charsettab);
				intbuf[j++] = kbuf[i++] & 0x7f;
				intbuf[j++] = kbuf[i++] & 0x7f;
				consumed += 2;
			} else {
				intbuf[j++]
					= CS_RAW0 + (GRcharset - charsettab);
				intbuf[j++] = kbuf[i++] & 0x7f;
				consumed++;
			}
		} else {
			/*
			 * character range check.
			 */
			switch (GLcharset->type) {
			case CS9696:
				if (i + 1 < kbuflen) {
					if (0x20 <= kbuf[i] && kbuf[i] <= 0x7f
					 && 0x20 <= kbuf[i + 1]
					 && kbuf[i + 1] <= 0x7f)
						break;
					else
						goto raw;
				} else {
					i = kbuflen;
					continue;
				}
			case CS9494:
				if (i + 1 < kbuflen) {
					if (0x21 <= kbuf[i] && kbuf[i] <= 0x7e
					 && 0x21 <= kbuf[i + 1]
					 && kbuf[i + 1] <= 0x7e)
						break;
					else
						goto raw;
				} else {
					i = kbuflen;
					continue;
				}
			case CS96:
				if (0x20 <= kbuf[i] && kbuf[i] <= 0x7f)
					break;
				goto raw;
			case CS94:
				if (0x21 <= kbuf[i] && kbuf[i] <= 0x7e)
					break;
				goto raw;
			}

			if (GLcharset->type == CS9494
			 || GLcharset->type == CS9696) {
				intbuf[j++]
					= CS_RAW0 + (GLcharset - charsettab);
				intbuf[j++] = kbuf[i++] & 0x7f;
				intbuf[j++] = kbuf[i++] & 0x7f;
				consumed += 2;
			} else {
				intbuf[j++]
					= CS_RAW0 + (GLcharset - charsettab);
				intbuf[j++] = kbuf[i++] & 0x7f;
				consumed++;
			}
		}

		singlegl = singlegr = 0;
		continue;

raw:
		if (kbuf[i] & 0x80) {
			intbuf[j++] = CS_RAW0 + v_key_len(sp, kbuf[i]);
			intbuf[j++] = kbuf[i] & 0x7f;
			i++;
			singlegl = singlegr = 0;
			consumed++;
			continue;
		}

		/* initialize state information, if we got control chars. */
		if (strchr("\r\n", kbuf[i])) {
			if (e && F_ISSET(e, E_SOLRESET))
				multi_initstate(sp, e, &isostate);
		}
		
		intbuf[j++] = kbuf[i++] & 0x7f;
		singlegl = singlegr = 0;
		consumed++;
	}

	/*
	 * pack state information
	 */
    {
	int *t;

	if (isostate.charsets[0])
		packedform.g0 = isostate.charsets[0] - charsettab;
	else
		packedform.g0 = 0x00;
	if (isostate.charsets[1])
		packedform.g1 = isostate.charsets[1] - charsettab;
	else
		packedform.g1 = 0x00;
	if (isostate.charsets[2])
		packedform.g2 = isostate.charsets[2] - charsettab;
	else
		packedform.g2 = 0x00;
	if (isostate.charsets[3])
		packedform.g3 = isostate.charsets[3] - charsettab;
	else
		packedform.g3 = 0x00;
	packedform.gl = isostate.gl;
	packedform.gr = isostate.gr;

	t = (int *)&packedform;

	*pstate = *t;
    }
	*pintbuflen = j;
	*pconsumed = consumed;
#undef GLcharset
#undef GRcharset
}
#endif /*MULTIBYTE*/
