#include "tag.h"

static int
tcs(char *cs)
{
	int pfd[2];

	if(pipe(pfd) < 0)
		sysfatal("tcs: pipe: %r");
	switch(fork()) {
	case -1:
		sysfatal("tcs: fork: %r");
	case 0:
		dup(pfd[0], 0);
		dup(pfd[0], 1);
		close(pfd[0]);
		close(pfd[1]);
		execl("/bin/tcs", "tcs", "-f", cs, nil);
		sysfatal("tcs: exec: %r");
	}

	close(pfd[0]);
	return pfd[1];
}

static int
stringlen(uchar *p, uchar *e, int byte÷char, char *sep)
{
	uchar *i;

	i = p;
	while(i < e && memcmp(i, sep, byte÷char) != 0)
		i += byte÷char;
	return i - p;
}

static char **
gstrings(uchar **p, uchar *e, int enc)
{
	int fd;
	char *ss[MaxStrings+1], **s, **strings;
	int len, m;

	/*
	 * Utf16 endianness must stupidly be specified in both the
	 * encoding field and in the byte order mark of every following
	 * string field. The spec begs immplementations to screw up.
	 * Before starting tcs, check for a mismatch between the
	 * encoding field and the first text field, assume the byte
	 * order mark is correct, and hope the rest of the text fields
	 * have the same byte order.
	 */
	switch(enc) {
	case Eutf16le:
	case Eutf16be:
		/* give up if the first text field lacks a bom */
		if((**p != 0xff && *(*p+1) != 0xfe) && (**p != 0xfe && *(*p+1) != 0xff))
			break;

		if(!(enc == Eutf16be
			? **p == 0xfe && *(*p+1) == 0xff
			: **p == 0xff && *(*p+1) == 0xfe)) {
			fprint(2, "%s: enc-bom mismatch; using bom\n", argv0);
			enc = (enc == Eutf16le ? Eutf16be : Eutf16le);
		}
	}

	/* tcs can't be started without an encstr */
	if(enc >= Eend) {
		werrstr("discarding: unknown encoding: %d", enc);
		return nil;
	}

	SET(fd);
	if(enc != Eutf) {
		if((fd = tcs(enctab[enc].str)) < 0) {
			werrstr("tcs: %r");
			return nil;
		}
	}

	for(s=ss; *p < e; s++) {
		if(s - ss + 1 > sizeof ss - 1) {
			werrstr("too many strings; limit is %d", MaxStrings);
			return nil;
		}

		switch(enc) {
		case E8859:
		case Eutf:
			break;
		case Eutf16le:
		case Eutf16be:
			/* check if there's no bom */
			if((**p != 0xff && *(*p+1) != 0xfe) && (**p != 0xfe && *(*p+1) != 0xff))
				break;

			/* restarting tcs to switch endianness at this point would be too much work */
			if(!(enc == Eutf16be
				? **p == 0xfe && *(*p+1) == 0xff
				: **p == 0xff && *(*p+1) == 0xfe)) {
				werrstr("enc-bom mismatch");
				return nil;
			}

			/* skip over the bom. tcs would otherwise turn it into gibberish utf8 */
			*p += 2;
			break;
		default:
			if(enc >= Eend)	/* checked above */
				abort();
			werrstr("discarding: unimplemented encoding: %s", enctab[enc].str);
			return nil;
		}

		len = stringlen(*p, e, enctab[enc].len, enctab[enc].sep);
		if(enc == Eutf || len == 0) {	/* a zero-length field would close tcs' pipe */
			*s = emalloc(len + 1);
			memcpy(*s, *p, len);
			(*s)[len] = '\0';
		} else {
			*s = emalloc(len + 1);	// FIXME -- probably not the right length
			if(write(fd, *p, len) < len) {
				werrstr("write tcs: %r");
				return nil;
			}
			if((m = read(fd, *s, len)) < 0) {	// FIXME -- probably not the right length
				werrstr("read tcs: %r");
				return nil;
			}
			(*s)[m] = '\0';
		}

		*p += len;
		if(*p < e)
			*p += enctab[enc].len;
	}
	*s = nil;

	len = sizeof(char *) * (s-ss + 1);
	strings = emalloc(len);
	memcpy(strings, ss, len);

	if(enc != Eutf) {
		close(fd);
		waitpid();
	}

	return strings;
}
	
void
gstr(uchar **p, char *s, int n)
{
	memcpy(s, *p, n);
	s[n] = '\0';
	*p += n;
}

void
pstr(uchar **p, char *s, int n)
{
	memcpy(*p, s, n);
	*p += n;
}

u8int
g8(uchar **p)
{
	return *(*p)++;
}

void
p8(uchar **p, u8int n)
{
	*(*p)++ = n;
}

u16int
g16(uchar **p)
{
	u16int x;
	x  = *(*p)++ << 8;
	x |= *(*p)++;
	return x;
}

void
p16(uchar **p, u16int n)
{
	*(*p)++ = n >> 8;
	*(*p)++ = n;
}

u32int
g24(uchar **p)
{
	u32int x;
	x  = *(*p)++ << 16;
	x |= *(*p)++ << 8;
	x |= *(*p)++;
	return x;
}

u32int
g32(uchar **p)
{
	u32int x;
	x  = *(*p)++ << 24;
	x |= *(*p)++ << 16;
	x |= *(*p)++ << 8;
	x |= *(*p)++;
	return x;
}

u32int
gss32(uchar **p)
{
	u32int x;
	x  = *(*p)++ << 21;
	x |= *(*p)++ << 14;
	x |= *(*p)++ << 7;
	x |= *(*p)++;
	return x;
}

void
pss32(uchar **p, u32int n)
{
	*(*p)++ = (n >> 21) & 0x7f;
	*(*p)++ = (n >> 14) & 0x7f;
	*(*p)++ = (n >> 7) & 0x7f;
	*(*p)++ = n & 0x7f;
}

void
gheader(uchar **p, Header *h)
{
	gstr(p, h->magic, MagicSize);
	h->version = g8(p);
	h->revision = g8(p);
	h->flags = g8(p);
	h->length = gss32(p);
}

void
pheader(uchar **p, Header *h)
{
	pstr(p, h->magic, MagicSize);
	p8(p, h->version);
	p8(p, h->revision);
	p8(p, h->flags);
	pss32(p, h->length);
}

void
grechdr(uchar **p, Rechdr *rh, Header *h)
{
	switch(h->version) {
	case 2:
		gstr(p, rh->id, 3);
		rh->length = g24(p);
		rh->flags = 0;
		break;
	case 3:
		gstr(p, rh->id, IdSize);
		rh->length = g32(p);
		rh->flags = g16(p);
		break;
	case 4:
		gstr(p, rh->id, IdSize);
		rh->length = gss32(p);
		rh->flags = g16(p);
		break;
	default:
		abort();
	}
}

void
prechdr(uchar **p, Rechdr *rh)
{
	pstr(p, rh->id, IdSize);
	pss32(p, rh->length);
	p16(p, rh->flags);
}

void
precord(uchar **p, Record *r)
{
	prechdr(p, r);
	recpacker(r->type)(p, r);
}

int
gcomment(uchar **p, Comment *c, int n)
{
	uchar *q;
	int enc;
	char **strings;

	q = *p + n;
	enc = g8(p);
	gstr(p, c->language, 3);

	if((strings = gstrings(p, q, enc)) == nil)
		return -1;	/* return errstr unmodified */
	if(strings[0] == nil || strings[1] == nil) {
		werrstr("discarding: empty");
		return -1;
	}

	c->description = strings[0];
	c->comment = strings[1];

	return 0;
}

void
pcomment(uchar **p, Record *r)
{
	p8(p, Eutf);
	pstr(p, r->c.language, LangSize);
	pstr(p, r->c.description, strlen(r->c.description));
	*(*p)++ = '\0';
	pstr(p, r->c.comment, strlen(r->c.comment));
}

int
gtext(uchar **p, Text *t, int n)
{
	uchar *q;
	int enc;

	q = *p + n;
	enc = g8(p);

	if((t->strings = gstrings(p, q, enc)) == nil)
		return -1;	/* return errstr unmodified */
	if(t->strings[0] == nil) {
		werrstr("discarding: empty");
		return -1;
	}

	return 0;
}

void
ptext(uchar **p, Record *r)
{
	char **s;

	p8(p, Eutf);
	for(s = r->t.strings; *s != nil; s++) {
		pstr(p, *s, strlen(*s));
		if(*(s+1) != nil)
			*(*p)++ = '\0';
	}
}