#include "tag.h" static int tcs(char *cs) { int pfd[2]; if(pipe(pfd) < 0) sysfatal("tcs: pipe: %r"); switch(fork()) { case -1: sysfatal("tcs: fork: %r"); case 0: dup(pfd[0], 0); dup(pfd[0], 1); close(pfd[0]); close(pfd[1]); execl("/bin/tcs", "tcs", "-f", cs, nil); sysfatal("tcs: exec: %r"); } close(pfd[0]); return pfd[1]; } static int stringlen(uchar *p, uchar *e, int byte÷char, char *sep) { uchar *i; i = p; while(i < e && memcmp(i, sep, byte÷char) != 0) i += byte÷char; return i - p; } static char ** gstrings(uchar **p, uchar *e, int enc) { int fd; char *ss[MaxStrings+1], **s, **strings; int len, m; /* * Utf16 endianness must stupidly be specified in both the * encoding field and in the byte order mark of every following * string field. The spec begs immplementations to screw up. * Before starting tcs, check for a mismatch between the * encoding field and the first text field, assume the byte * order mark is correct, and hope the rest of the text fields * have the same byte order. */ switch(enc) { case Eutf16le: case Eutf16be: /* give up if the first text field lacks a bom */ if((**p != 0xff && *(*p+1) != 0xfe) && (**p != 0xfe && *(*p+1) != 0xff)) break; if(!(enc == Eutf16be ? **p == 0xfe && *(*p+1) == 0xff : **p == 0xff && *(*p+1) == 0xfe)) { fprint(2, "%s: enc-bom mismatch; using bom\n", argv0); enc = (enc == Eutf16le ? Eutf16be : Eutf16le); } } /* tcs can't be started without an encstr */ if(enc >= Eend) { werrstr("discarding: unknown encoding: %d", enc); return nil; } SET(fd); if(enc != Eutf) { if((fd = tcs(enctab[enc].str)) < 0) { werrstr("tcs: %r"); return nil; } } for(s=ss; *p < e; s++) { if(s - ss + 1 > sizeof ss - 1) { werrstr("too many strings; limit is %d", MaxStrings); return nil; } switch(enc) { case E8859: case Eutf: break; case Eutf16le: case Eutf16be: /* check if there's no bom */ if((**p != 0xff && *(*p+1) != 0xfe) && (**p != 0xfe && *(*p+1) != 0xff)) break; /* restarting tcs to switch endianness at this point would be too much work */ if(!(enc == Eutf16be ? **p == 0xfe && *(*p+1) == 0xff : **p == 0xff && *(*p+1) == 0xfe)) { werrstr("enc-bom mismatch"); return nil; } /* skip over the bom. tcs would otherwise turn it into gibberish utf8 */ *p += 2; break; default: if(enc >= Eend) /* checked above */ abort(); werrstr("discarding: unimplemented encoding: %s", enctab[enc].str); return nil; } len = stringlen(*p, e, enctab[enc].len, enctab[enc].sep); if(enc == Eutf || len == 0) { /* a zero-length field would close tcs' pipe */ *s = emalloc(len + 1); memcpy(*s, *p, len); (*s)[len] = '\0'; } else { *s = emalloc(len + 1); // FIXME -- probably not the right length if(write(fd, *p, len) < len) { werrstr("write tcs: %r"); return nil; } if((m = read(fd, *s, len)) < 0) { // FIXME -- probably not the right length werrstr("read tcs: %r"); return nil; } (*s)[m] = '\0'; } *p += len; if(*p < e) *p += enctab[enc].len; } *s = nil; len = sizeof(char *) * (s-ss + 1); strings = emalloc(len); memcpy(strings, ss, len); if(enc != Eutf) { close(fd); waitpid(); } return strings; } void gstr(uchar **p, char *s, int n) { memcpy(s, *p, n); s[n] = '\0'; *p += n; } void pstr(uchar **p, char *s, int n) { memcpy(*p, s, n); *p += n; } u8int g8(uchar **p) { return *(*p)++; } void p8(uchar **p, u8int n) { *(*p)++ = n; } u16int g16(uchar **p) { u16int x; x = *(*p)++ << 8; x |= *(*p)++; return x; } void p16(uchar **p, u16int n) { *(*p)++ = n >> 8; *(*p)++ = n; } u32int g24(uchar **p) { u32int x; x = *(*p)++ << 16; x |= *(*p)++ << 8; x |= *(*p)++; return x; } u32int g32(uchar **p) { u32int x; x = *(*p)++ << 24; x |= *(*p)++ << 16; x |= *(*p)++ << 8; x |= *(*p)++; return x; } u32int gss32(uchar **p) { u32int x; x = *(*p)++ << 21; x |= *(*p)++ << 14; x |= *(*p)++ << 7; x |= *(*p)++; return x; } void pss32(uchar **p, u32int n) { *(*p)++ = (n >> 21) & 0x7f; *(*p)++ = (n >> 14) & 0x7f; *(*p)++ = (n >> 7) & 0x7f; *(*p)++ = n & 0x7f; } void gheader(uchar **p, Header *h) { gstr(p, h->magic, MagicSize); h->version = g8(p); h->revision = g8(p); h->flags = g8(p); h->length = gss32(p); } void pheader(uchar **p, Header *h) { pstr(p, h->magic, MagicSize); p8(p, h->version); p8(p, h->revision); p8(p, h->flags); pss32(p, h->length); } void grechdr(uchar **p, Rechdr *rh, Header *h) { switch(h->version) { case 2: gstr(p, rh->id, 3); rh->length = g24(p); rh->flags = 0; break; case 3: gstr(p, rh->id, IdSize); rh->length = g32(p); rh->flags = g16(p); break; case 4: gstr(p, rh->id, IdSize); rh->length = gss32(p); rh->flags = g16(p); break; default: abort(); } } void prechdr(uchar **p, Rechdr *rh) { pstr(p, rh->id, IdSize); pss32(p, rh->length); p16(p, rh->flags); } void precord(uchar **p, Record *r) { prechdr(p, r); recpacker(r->type)(p, r); } int gcomment(uchar **p, Comment *c, int n) { uchar *q; int enc; char **strings; q = *p + n; enc = g8(p); gstr(p, c->language, 3); if((strings = gstrings(p, q, enc)) == nil) return -1; /* return errstr unmodified */ if(strings[0] == nil || strings[1] == nil) { werrstr("discarding: empty"); return -1; } c->description = strings[0]; c->comment = strings[1]; return 0; } void pcomment(uchar **p, Record *r) { p8(p, Eutf); pstr(p, r->c.language, LangSize); pstr(p, r->c.description, strlen(r->c.description)); *(*p)++ = '\0'; pstr(p, r->c.comment, strlen(r->c.comment)); } int gtext(uchar **p, Text *t, int n) { uchar *q; int enc; q = *p + n; enc = g8(p); if((t->strings = gstrings(p, q, enc)) == nil) return -1; /* return errstr unmodified */ if(t->strings[0] == nil) { werrstr("discarding: empty"); return -1; } return 0; } void ptext(uchar **p, Record *r) { char **s; p8(p, Eutf); for(s = r->t.strings; *s != nil; s++) { pstr(p, *s, strlen(*s)); if(*(s+1) != nil) *(*p)++ = '\0'; } }