ref: 24741b77d6c03549d7992d4dba003f145e46f109
parent: f9625c0942200c16a4ab48e4505b792cc75f9cc7
author: qwx <qwx@sciops.net>
date: Sat Aug 19 05:14:59 EDT 2023
file: MOD and opus files
--- /dev/null
+++ b/sys/src/cmd/file.c
@@ -1,0 +1,1771 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include <mach.h>
+
+/*
+ * file - determine type of file
+ */
+#define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
+
+uchar buf[6001];
+short cfreq[140];
+short wfreq[50];
+int nbuf;
+Dir* mbuf;
+int fd;
+char *fname;
+char *slash;
+
+enum
+{
+ Cword,
+ Fword,
+ Aword,
+ Alword,
+ Lword,
+ I1,
+ I2,
+ I3,
+ Clatin = 128,
+ Cbinary,
+ Cnull,
+ Ceascii,
+ Cutf,
+};
+struct
+{
+ char* word;
+ int class;
+} dict[] =
+{
+ "PATH", Lword,
+ "TEXT", Aword,
+ "adt", Alword,
+ "aggr", Alword,
+ "alef", Alword,
+ "array", Lword,
+ "block", Fword,
+ "char", Cword,
+ "common", Fword,
+ "con", Lword,
+ "data", Fword,
+ "dimension", Fword,
+ "double", Cword,
+ "extern", Cword,
+ "bio", I2,
+ "float", Cword,
+ "fn", Lword,
+ "function", Fword,
+ "h", I3,
+ "implement", Lword,
+ "import", Lword,
+ "include", I1,
+ "int", Cword,
+ "integer", Fword,
+ "iota", Lword,
+ "libc", I2,
+ "long", Cword,
+ "module", Lword,
+ "real", Fword,
+ "ref", Lword,
+ "register", Cword,
+ "self", Lword,
+ "short", Cword,
+ "static", Cword,
+ "stdio", I2,
+ "struct", Cword,
+ "subroutine", Fword,
+ "u", I2,
+ "void", Cword,
+};
+
+/* codes for 'mode' field in language structure */
+enum {
+ Normal = 0,
+ First, /* first entry for language spanning several ranges */
+ Multi, /* later entries " " " ... */
+ Shared, /* codes used in several languages */
+ };
+
+struct
+{
+ int mode; /* see enum above */
+ int count;
+ int low;
+ int high;
+ char *name;
+
+} language[] =
+{
+ Normal, 0, 0x0100, 0x01FF, "Extended Latin",
+ Normal, 0, 0x0370, 0x03FF, "Greek",
+ Normal, 0, 0x0400, 0x04FF, "Cyrillic",
+ Normal, 0, 0x0530, 0x058F, "Armenian",
+ Normal, 0, 0x0590, 0x05FF, "Hebrew",
+ Normal, 0, 0x0600, 0x06FF, "Arabic",
+ Normal, 0, 0x0900, 0x097F, "Devanagari",
+ Normal, 0, 0x0980, 0x09FF, "Bengali",
+ Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi",
+ Normal, 0, 0x0A80, 0x0AFF, "Gujarati",
+ Normal, 0, 0x0B00, 0x0B7F, "Oriya",
+ Normal, 0, 0x0B80, 0x0BFF, "Tamil",
+ Normal, 0, 0x0C00, 0x0C7F, "Telugu",
+ Normal, 0, 0x0C80, 0x0CFF, "Kannada",
+ Normal, 0, 0x0D00, 0x0D7F, "Malayalam",
+ Normal, 0, 0x0E00, 0x0E7F, "Thai",
+ Normal, 0, 0x0E80, 0x0EFF, "Lao",
+ Normal, 0, 0x1000, 0x105F, "Tibetan",
+ Normal, 0, 0x10A0, 0x10FF, "Georgian",
+ Normal, 0, 0x3040, 0x30FF, "Japanese",
+ Normal, 0, 0x3100, 0x312F, "Chinese",
+ First, 0, 0x3130, 0x318F, "Korean",
+ Multi, 0, 0x3400, 0x3D2F, "Korean",
+ Shared, 0, 0x4e00, 0x9fff, "CJK",
+ Normal, 0, 0, 0, 0, /* terminal entry */
+};
+
+
+enum
+{
+ Fascii, /* printable ascii */
+ Flatin, /* latin 1*/
+ Futf, /* UTF character set */
+ Fbinary, /* binary */
+ Feascii, /* ASCII with control chars */
+ Fnull, /* NULL in file */
+} guess;
+
+void bump_utf_count(Rune);
+int cistrncmp(char*, char*, int);
+void filetype(int);
+int getfontnum(uchar*, uchar**);
+int isas(void);
+int isc(void);
+int iscint(void);
+int isenglish(void);
+int ishp(void);
+int ishtml(void);
+int isrfc822(void);
+int ismbox(void);
+int islimbo(void);
+int istga(void);
+int ismp3(void);
+int ismp4(void);
+int isoggvorbis(void);
+int isoggopus(void);
+int ismung(void);
+int isp9bit(void);
+int isp9font(void);
+int isrtf(void);
+int ismsdos(void);
+int isicocur(void);
+int iself(void);
+int istring(void);
+int isoffstr(void);
+int iff(void);
+int long0(void);
+int longoff(void);
+int istar(void);
+int isface(void);
+int isexec(void);
+int isudiff(void);
+int isexecscript(void);
+int p9bitnum(char*, int*);
+int p9subfont(uchar*);
+void print_utf(void);
+void type(char*, int);
+int utf_count(void);
+void wordfreq(void);
+
+int (*call[])(void) =
+{
+ long0, /* recognizable by first 4 bytes */
+ istring, /* recognizable by first string */
+ iself, /* ELF (foreign) executable */
+ isexec, /* native executables */
+ isexecscript, /* executable scripts */
+ iff, /* interchange file format (strings) */
+ longoff, /* recognizable by 4 bytes at some offset */
+ isoffstr, /* recognizable by string at some offset */
+ isudiff, /* unified diff output */
+ isrfc822, /* email file */
+ ismbox, /* mail box */
+ istar, /* recognizable by tar checksum */
+ iscint, /* compiler/assembler intermediate */
+ ishtml, /* html keywords */
+ islimbo, /* limbo source */
+ isc, /* c & alef compiler key words */
+ isas, /* assembler key words */
+ isp9font, /* plan 9 font */
+ isp9bit, /* plan 9 image (as from /dev/window) */
+ isrtf, /* rich text format */
+ ismsdos, /* msdos exe (virus file attachement) */
+ isicocur, /* windows icon or cursor file */
+ isface, /* ascii face file */
+ istga,
+ isoggvorbis,
+ isoggopus,
+ ismp4,
+ ismp3,
+
+ /* last resorts */
+ ismung, /* entropy compressed/encrypted */
+ isenglish, /* char frequency English */
+ 0
+};
+
+int mime;
+
+char OCTET[] = "application/octet-stream";
+char PLAIN[] = "text/plain";
+
+void
+main(int argc, char *argv[])
+{
+ int i, j, maxlen;
+ char *cp;
+ Rune r;
+
+ ARGBEGIN{
+ case 'm':
+ mime = 1;
+ break;
+ default:
+ fprint(2, "usage: file [-m] [file...]\n");
+ exits("usage");
+ }ARGEND;
+
+ maxlen = 0;
+ if(mime == 0 || argc > 1){
+ for(i = 0; i < argc; i++) {
+ for (j = 0, cp = argv[i]; *cp; j++, cp += chartorune(&r, cp))
+ ;
+ if(j > maxlen)
+ maxlen = j;
+ }
+ }
+ if (argc <= 0) {
+ if(!mime)
+ print ("stdin: ");
+ filetype(0);
+ }
+ else {
+ for(i = 0; i < argc; i++)
+ type(argv[i], maxlen);
+ }
+ exits(0);
+}
+
+void
+type(char *file, int nlen)
+{
+ Rune r;
+ int i;
+ char *p;
+
+ if(nlen > 0){
+ slash = 0;
+ for (i = 0, p = file; *p; i++) {
+ if (*p == '/') /* find rightmost slash */
+ slash = p;
+ p += chartorune(&r, p); /* count runes */
+ }
+ print("%s:%*s",file, nlen-i+1, "");
+ }
+ fname = file;
+ if ((fd = open(file, OREAD)) < 0) {
+ fprint(2, "cannot open: %r\n");
+ return;
+ }
+ filetype(fd);
+ close(fd);
+}
+
+void
+utfconv(void)
+{
+ Rune r;
+ uchar *rb;
+ char *p, *e;
+ int i;
+
+ if(nbuf < 4)
+ return;
+
+ if(memcmp(buf, "\x00\x00\xFE\xFF", 4) == 0){
+ if(!mime)
+ print("utf-32be ");
+ return;
+ } else
+ if(memcmp(buf, "\xFE\xFF\x00\x00", 4) == 0){
+ if(!mime)
+ print("utf-32le ");
+ return;
+ } else
+ if(memcmp(buf, "\xEF\xBB\xBF", 3) == 0){
+ memmove(buf, buf+3, nbuf-3);
+ nbuf -= 3;
+ return;
+ } else
+ if(memcmp(buf, "\xFE\xFF", 2) == 0){
+ if(!mime)
+ print("utf-16be ");
+
+ nbuf -= 2;
+ rb = malloc(nbuf+1);
+ memmove(rb, buf+2, nbuf);
+ p = (char*)buf;
+ e = p+sizeof(buf)-UTFmax-1;
+ for(i=0; i<nbuf && p < e; i+=2){
+ r = rb[i+1] | rb[i]<<8;
+ p += runetochar(p, &r);
+ }
+ *p = 0;
+ free(rb);
+ nbuf = p - (char*)buf;
+ } else
+ if(memcmp(buf, "\xFF\xFE", 2) == 0){
+ if(!mime)
+ print("utf-16le ");
+
+ nbuf -= 2;
+ rb = malloc(nbuf+1);
+ memmove(rb, buf+2, nbuf);
+ p = (char*)buf;
+ e = p+sizeof(buf)-UTFmax-1;
+ for(i=0; i<nbuf && p < e; i+=2){
+ r = rb[i] | rb[i+1]<<8;
+ p += runetochar(p, &r);
+ }
+ *p = 0;
+ free(rb);
+ nbuf = p - (char*)buf;
+ }
+}
+
+void
+filetype(int fd)
+{
+ Rune r;
+ int i, f, n;
+ char *p, *eob;
+
+ free(mbuf);
+ mbuf = dirfstat(fd);
+ if(mbuf == nil){
+ fprint(2, "cannot stat: %r\n");
+ return;
+ }
+ if(mbuf->mode & DMDIR) {
+ print("%s\n", mime ? OCTET : "directory");
+ return;
+ }
+ if(mbuf->type != 'M' && mbuf->type != '|') {
+ if(mime)
+ print("%s\n", OCTET);
+ else
+ print("special file #%C/%s\n", mbuf->type, mbuf->name);
+ return;
+ }
+ /* may be reading a pipe on standard input */
+ nbuf = readn(fd, buf, sizeof(buf)-1);
+ if(nbuf < 0) {
+ fprint(2, "cannot read: %r\n");
+ return;
+ }
+ if(nbuf == 0) {
+ print("%s\n", mime ? PLAIN : "empty file");
+ return;
+ }
+ buf[nbuf] = 0;
+
+ utfconv();
+
+ /*
+ * build histogram table
+ */
+ memset(cfreq, 0, sizeof(cfreq));
+ for (i = 0; language[i].name; i++)
+ language[i].count = 0;
+ eob = (char *)buf+nbuf;
+ for(n = 0, p = (char *)buf; p < eob; n++) {
+ if (!fullrune(p, eob-p) && eob-p < UTFmax)
+ break;
+ p += chartorune(&r, p);
+ if (r == 0)
+ f = Cnull;
+ else if (r <= 0x7f) {
+ if (!isprint(r) && !isspace(r))
+ f = Ceascii; /* ASCII control char */
+ else f = r;
+ } else if (r == 0x80) {
+ bump_utf_count(r);
+ f = Cutf;
+ } else if (r < 0xA0)
+ f = Cbinary; /* Invalid Runes */
+ else if (r <= 0xff)
+ f = Clatin; /* Latin 1 */
+ else {
+ bump_utf_count(r);
+ f = Cutf; /* UTF extension */
+ }
+ cfreq[f]++; /* ASCII chars peg directly */
+ }
+ /*
+ * gross classify
+ */
+ if (cfreq[Cbinary])
+ guess = Fbinary;
+ else if (cfreq[Cutf])
+ guess = Futf;
+ else if (cfreq[Clatin])
+ guess = Flatin;
+ else if (cfreq[Ceascii])
+ guess = Feascii;
+ else if (cfreq[Cnull])
+ guess = Fbinary;
+ else
+ guess = Fascii;
+ /*
+ * lookup dictionary words
+ */
+ memset(wfreq, 0, sizeof(wfreq));
+ if(guess == Fascii || guess == Flatin || guess == Futf)
+ wordfreq();
+ /*
+ * call individual classify routines
+ */
+ for(i=0; call[i]; i++)
+ if((*call[i])())
+ return;
+
+ /*
+ * if all else fails,
+ * print out gross classification
+ */
+ if (nbuf < 100 && !mime)
+ print(mime ? PLAIN : "short ");
+ if (guess == Fascii)
+ print("%s\n", mime ? PLAIN : "Ascii");
+ else if (guess == Feascii)
+ print("%s\n", mime ? PLAIN : "extended ascii");
+ else if (guess == Flatin)
+ print("%s\n", mime ? PLAIN : "latin ascii");
+ else if (guess == Futf && utf_count() < 4)
+ print_utf();
+ else print("%s\n", mime ? OCTET : "binary");
+}
+
+void
+bump_utf_count(Rune r)
+{
+ int low, high, mid;
+
+ high = sizeof(language)/sizeof(language[0])-1;
+ for (low = 0; low < high;) {
+ mid = (low+high)/2;
+ if (r >= language[mid].low) {
+ if (r <= language[mid].high) {
+ language[mid].count++;
+ break;
+ } else low = mid+1;
+ } else high = mid;
+ }
+}
+
+int
+utf_count(void)
+{
+ int i, count;
+
+ count = 0;
+ for (i = 0; language[i].name; i++)
+ if (language[i].count > 0)
+ switch (language[i].mode) {
+ case Normal:
+ case First:
+ count++;
+ break;
+ default:
+ break;
+ }
+ return count;
+}
+
+int
+chkascii(void)
+{
+ int i;
+
+ for (i = 'a'; i < 'z'; i++)
+ if (cfreq[i])
+ return 1;
+ for (i = 'A'; i < 'Z'; i++)
+ if (cfreq[i])
+ return 1;
+ return 0;
+}
+
+int
+find_first(char *name)
+{
+ int i;
+
+ for (i = 0; language[i].name != 0; i++)
+ if (language[i].mode == First
+ && strcmp(language[i].name, name) == 0)
+ return i;
+ return -1;
+}
+
+void
+print_utf(void)
+{
+ int i, printed, j;
+
+ if(mime){
+ print("%s\n", PLAIN);
+ return;
+ }
+ if (chkascii()) {
+ printed = 1;
+ print("Ascii");
+ } else
+ printed = 0;
+ for (i = 0; language[i].name; i++)
+ if (language[i].count) {
+ switch(language[i].mode) {
+ case Multi:
+ j = find_first(language[i].name);
+ if (j < 0)
+ break;
+ if (language[j].count > 0)
+ break;
+ /* Fall through */
+ case Normal:
+ case First:
+ if (printed)
+ print(" & ");
+ else printed = 1;
+ print("%s", language[i].name);
+ break;
+ case Shared:
+ default:
+ break;
+ }
+ }
+ if(!printed)
+ print("UTF");
+ print(" text\n");
+}
+
+void
+wordfreq(void)
+{
+ int low, high, mid, r;
+ uchar *p, *p2, c;
+
+ p = buf;
+ for(;;) {
+ while (p < buf+nbuf && !isalpha(*p))
+ p++;
+ if (p >= buf+nbuf)
+ return;
+ p2 = p;
+ while(p < buf+nbuf && isalpha(*p))
+ p++;
+ c = *p;
+ *p = 0;
+ high = sizeof(dict)/sizeof(dict[0]);
+ for(low = 0;low < high;) {
+ mid = (low+high)/2;
+ r = strcmp(dict[mid].word, (char*)p2);
+ if(r == 0) {
+ wfreq[dict[mid].class]++;
+ break;
+ }
+ if(r < 0)
+ low = mid+1;
+ else
+ high = mid;
+ }
+ *p++ = c;
+ }
+}
+
+typedef struct Filemagic Filemagic;
+struct Filemagic {
+ ulong x;
+ ulong mask;
+ char *desc;
+ char *mime;
+};
+
+/*
+ * integers in this table must be as seen on a little-endian machine
+ * when read from a file.
+ */
+Filemagic long0tab[] = {
+ 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file", OCTET,
+ /* "pac1" */
+ 0x31636170, 0xFFFFFFFF, "pac3 audio file", OCTET,
+ /* "pXc2 */
+ 0x32630070, 0xFFFF00FF, "pac4 audio file", OCTET,
+ 0xBA010000, 0xFFFFFFFF, "mpeg system stream", OCTET,
+ 0x43614c66, 0xFFFFFFFF, "FLAC audio file", "audio/flac",
+ 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable", OCTET,
+ 0x04034B50, 0xFFFFFFFF, "zip archive", "application/zip",
+ 070707, 0xFFFF, "cpio archive", "application/x-cpio",
+ 0x2F7, 0xFFFF, "tex dvi", "application/dvi",
+ 0xfaff, 0xfeff, "mp3 audio", "audio/mpeg",
+ 0xf0ff, 0xf6ff, "aac audio", "audio/aac",
+ /* 0xfeedface: this could alternately be a Next Plan 9 boot image */
+ 0xcefaedfe, 0xFFFFFFFF, "32-bit power Mach-O executable", OCTET,
+ /* 0xfeedfacf */
+ 0xcffaedfe, 0xFFFFFFFF, "64-bit power Mach-O executable", OCTET,
+ /* 0xcefaedfe */
+ 0xfeedface, 0xFFFFFFFF, "386 Mach-O executable", OCTET,
+ /* 0xcffaedfe */
+ 0xfeedfacf, 0xFFFFFFFF, "amd64 Mach-O executable", OCTET,
+ /* 0xcafebabe */
+ 0xbebafeca, 0xFFFFFFFF, "Mach-O universal executable", OCTET,
+ /*
+ * venti & fossil magic numbers are stored big-endian on disk,
+ * thus the numbers appear reversed in this table.
+ */
+ 0xad4e5cd1, 0xFFFFFFFF, "venti arena", OCTET,
+ 0x2bb19a52, 0xFFFFFFFF, "paq archive", OCTET,
+ 0x1a53454e, 0xFFFFFFFF, "NES ROM", OCTET,
+ /* tcpdump pcap file */
+ 0xa1b2c3d4, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
+ 0xd4c3b2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
+ 0xa1b23c4d, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
+ 0x4d3cb2a1, 0xFFFFFFFF, "pcap file", "application/vnd.tcpdump.pcap",
+};
+
+int
+filemagic(Filemagic *tab, int ntab, ulong x)
+{
+ int i;
+
+ for(i=0; i<ntab; i++)
+ if((x&tab[i].mask) == tab[i].x){
+ print("%s\n", mime ? tab[i].mime : tab[i].desc);
+ return 1;
+ }
+ return 0;
+}
+
+int
+long0(void)
+{
+ return filemagic(long0tab, nelem(long0tab), LENDIAN(buf));
+}
+
+typedef struct Fileoffmag Fileoffmag;
+struct Fileoffmag {
+ ulong off;
+ Filemagic;
+};
+
+/*
+ * integers in this table must be as seen on a little-endian machine
+ * when read from a file.
+ */
+Fileoffmag longofftab[] = {
+ /*
+ * venti & fossil magic numbers are stored big-endian on disk,
+ * thus the numbers appear reversed in this table.
+ */
+ 256*1024, 0xe7a5e4a9, 0xFFFFFFFF, "venti arenas partition", OCTET,
+ 256*1024, 0xc75e5cd1, 0xFFFFFFFF, "venti index section", OCTET,
+ 128*1024, 0x89ae7637, 0xFFFFFFFF, "fossil write buffer", OCTET,
+ 4, 0x31647542, 0xFFFFFFFF, "OS X finder properties", OCTET,
+ 0x100, 0x41474553, 0xFFFFFFFF, "SEGA ROM", OCTET,
+ 0x1fc, 0xAA550000, 0xFFFF0000, "bootable disk image", OCTET,
+};
+
+int
+fileoffmagic(Fileoffmag *tab, int ntab)
+{
+ int i;
+ ulong x;
+ Fileoffmag *tp;
+ uchar buf[sizeof(long)];
+
+ for(i=0; i<ntab; i++) {
+ tp = tab + i;
+ seek(fd, tp->off, 0);
+ if (readn(fd, buf, sizeof buf) != sizeof buf)
+ continue;
+ x = LENDIAN(buf);
+ if((x&tp->mask) == tp->x){
+ print("%s\n", mime ? tp->mime : tp->desc);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+longoff(void)
+{
+ return fileoffmagic(longofftab, nelem(longofftab));
+}
+
+int
+isexec(void)
+{
+ Fhdr f;
+
+ seek(fd, 0, 0); /* reposition to start of file */
+ if(crackhdr(fd, &f)) {
+ print("%s\n", mime ? OCTET : f.name);
+ return 1;
+ }
+ return 0;
+}
+
+/* executable scripts */
+int
+isexecscript(void)
+{
+ char tmp[128+1], *p;
+
+ if (memcmp("#!", buf, 2) != 0)
+ return 0;
+ memmove(tmp, buf+2, sizeof(tmp) - 1);
+ tmp[sizeof(tmp) - 1] = 0;
+ if ((p = strchr(tmp, '\n')) != nil)
+ *p = 0;
+ if ((p = strpbrk(tmp, " \t")) != nil)
+ *p = 0;
+ if ((p = strrchr(tmp, '/')) != nil)
+ p++;
+ else
+ p = tmp;
+
+ if (strcmp("rc", p) == 0)
+ print("%s\n", mime ? PLAIN : "rc executable file");
+ else if (strcmp("sh", p) == 0)
+ print("%s\n", mime ? "application/x-sh" : "sh executable file");
+ else if (strcmp("bash", p) == 0)
+ print("%s\n", mime ? "application/x-sh" : "bash executable file");
+ else if (strcmp("awk", p) == 0)
+ print("%s\n", mime ? PLAIN : "awk executable file");
+ else if (strcmp("sed", p) == 0)
+ print("%s\n", mime ? PLAIN : "sed executable file");
+ else if (strcmp("perl", p) == 0)
+ print("%s\n", mime ? PLAIN : "perl executable file");
+ else
+ print("%s\n", mime ? PLAIN : "unknown executable file");
+ return 1;
+}
+
+/* from tar.c */
+enum { NAMSIZ = 100, TBLOCK = 512 };
+
+union hblock
+{
+ char dummy[TBLOCK];
+ struct header
+ {
+ char name[NAMSIZ];
+ char mode[8];
+ char uid[8];
+ char gid[8];
+ char size[12];
+ char mtime[12];
+ char chksum[8];
+ char linkflag;
+ char linkname[NAMSIZ];
+ /* rest are defined by POSIX's ustar format; see p1003.2b */
+ char magic[6]; /* "ustar" */
+ char version[2];
+ char uname[32];
+ char gname[32];
+ char devmajor[8];
+ char devminor[8];
+ char prefix[155]; /* if non-null, path = prefix "/" name */
+ } dbuf;
+};
+
+int
+checksum(union hblock *hp)
+{
+ int i;
+ char *cp;
+ struct header *hdr = &hp->dbuf;
+
+ for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp++)
+ *cp = ' ';
+ i = 0;
+ for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++)
+ i += *cp & 0xff;
+ return i;
+}
+
+int
+istar(void)
+{
+ int chksum;
+ char tblock[TBLOCK];
+ union hblock *hp = (union hblock *)tblock;
+ struct header *hdr = &hp->dbuf;
+
+ seek(fd, 0, 0); /* reposition to start of file */
+ if (readn(fd, tblock, sizeof tblock) != sizeof tblock)
+ return 0;
+ chksum = strtol(hdr->chksum, 0, 8);
+ if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
+ if (strcmp(hdr->magic, "ustar") == 0)
+ print(mime? "application/x-ustar\n": "posix tar archive\n");
+ else
+ print(mime? "application/x-tar\n": "tar archive\n");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * initial words to classify file
+ */
+struct FILE_STRING
+{
+ char *key;
+ char *filetype;
+ int length;
+ char *mime;
+} file_string[] =
+{
+ "\x1f\x9d", "compressed", 2, "application/x-compress",
+ "\x1f\x8b", "gzip compressed", 2, "application/x-gzip",
+ "BZh", "bzip2 compressed", 3, "application/x-bzip2",
+ "!<arch>\n__.SYMDEF", "archive random library", 16, OCTET,
+ "!<arch>\n", "archive", 8, OCTET,
+ "070707", "cpio archive - ascii header", 6, OCTET,
+ "QFI\xfb", "QCOW disk image", 4, OCTET,
+ "%!", "postscript", 2, "application/postscript",
+ "\004%!", "postscript", 3, "application/postscript",
+ "x T post", "troff output for post", 8, "application/troff",
+ "x T Latin1", "troff output for Latin1", 10, "application/troff",
+ "x T utf", "troff output for UTF", 7, "application/troff",
+ "x T 202", "troff output for 202", 7, "application/troff",
+ "x T aps", "troff output for aps", 7, "application/troff",
+ "x T ", "troff output", 4, "application/troff",
+ "GIF", "GIF image", 3, "image/gif",
+ "\0PC Research, Inc\0", "ghostscript fax file", 18, "application/ghostscript",
+ "%PDF", "PDF", 4, "application/pdf",
+ "<!DOCTYPE", "HTML file", 9, "text/html",
+ "<!doctype", "HTML file", 9, "text/html",
+ "<!--", "XML file", 4, "text/xml",
+ "<html>", "HTML file", 6, "text/html",
+ "<HTML>", "HTML file", 6, "text/html",
+ "<?xml", "HTML file", 5, "text/html",
+ "\111\111\052\000", "tiff", 4, "image/tiff",
+ "\115\115\000\052", "tiff", 4, "image/tiff",
+ "\377\330\377\340", "jpeg", 4, "image/jpeg",
+ "\377\330\377\341", "jpeg", 4, "image/jpeg",
+ "\377\330\377\333", "jpeg", 4, "image/jpeg",
+ "\xff\xd8", "jpeg", 2, "image/jpeg",
+ "BM", "bmp", 2, "image/bmp",
+ "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office document", 8, "application/doc",
+ "<MakerFile ", "FrameMaker file", 11, "application/framemaker",
+ "\033E\033", "HP PCL printer data", 3, OCTET,
+ "\033&", "HP PCL printer data", 2, OCTET,
+ "\033%-12345X", "HPJCL file", 9, "application/hpjcl",
+ "\033Lua", "Lua bytecode", 4, OCTET,
+ "ID3", "mp3 audio with id3", 3, "audio/mpeg",
+ ".snd", "sun audio", 4, "audio/basic",
+ "\211PNG", "PNG image", 4, "image/png",
+ "P1\n", "ppm", 3, "image/ppm",
+ "P2\n", "ppm", 3, "image/ppm",
+ "P3\n", "ppm", 3, "image/ppm",
+ "P4\n", "ppm", 3, "image/ppm",
+ "P5\n", "ppm", 3, "image/ppm",
+ "P6\n", "ppm", 3, "image/ppm",
+ "/* XPM */\n", "xbm", 10, "image/xbm",
+ ".HTML ", "troff -ms input", 6, "text/troff",
+ ".LP", "troff -ms input", 3, "text/troff",
+ ".ND", "troff -ms input", 3, "text/troff",
+ ".PP", "troff -ms input", 3, "text/troff",
+ ".TL", "troff -ms input", 3, "text/troff",
+ ".TR", "troff -ms input", 3, "text/troff",
+ ".TH", "manual page", 3, "text/troff",
+ ".\\\"", "troff input", 3, "text/troff",
+ ".de", "troff input", 3, "text/troff",
+ ".if", "troff input", 3, "text/troff",
+ ".nr", "troff input", 3, "text/troff",
+ ".tr", "troff input", 3, "text/troff",
+ "vac:", "venti score", 4, PLAIN,
+ "-----BEGIN CERTIFICATE-----\n",
+ "pem certificate", -1, PLAIN,
+ "-----BEGIN TRUSTED CERTIFICATE-----\n",
+ "pem trusted certificate", -1, PLAIN,
+ "-----BEGIN X509 CERTIFICATE-----\n",
+ "pem x.509 certificate", -1, PLAIN,
+ "subject=/C=", "pem certificate with header", -1, PLAIN,
+ "process snapshot ", "process snapshot", -1, "application/snapfs",
+ "d8:announce", "torrent file", 11, "application/x-bittorrent",
+ "[playlist]", "playlist", 10, "application/x-scpls",
+ "#EXTM3U", "playlist", 7, "audio/x-mpegurl",
+ "BEGIN:VCARD\r\n", "vCard", 13, "text/directory;profile=vcard",
+ "BEGIN:VCARD\n", "vCard", 12, "text/directory;profile=vcard",
+ "AT&T", "DjVu document", 4, "image/vnd.djvu",
+ "Extended module: ", "XM audio", 17, "audio/xm",
+ "MThd", "midi audio", 4, "audio/midi",
+ "MUS\x1a", "mus audio", 4, "audio/mus",
+ "Creative Voice File\x1a", "voc audio", 20, "audio/x-voc",
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xaa\x99\x55\x66", "Xilinx bitstream (not byteswappped)", 52, OCTET,
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xbb\x00\x00\x00\x44\x00\x22\x11\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\x66\x55\x99\xaa", "Xilinx bitstream (byteswappped)", 52, OCTET,
+ 0,0,0,0
+};
+
+int
+istring(void)
+{
+ int i, l;
+ struct FILE_STRING *p;
+
+ for(p = file_string; p->key; p++) {
+ l = p->length;
+ if(l == -1)
+ l = strlen(p->key);
+ if(nbuf >= l && memcmp(buf, p->key, l) == 0) {
+ print("%s\n", mime ? p->mime : p->filetype);
+ return 1;
+ }
+ }
+ if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */
+ for(i = 5; i < nbuf; i++)
+ if(buf[i] == '\n')
+ break;
+ if(mime)
+ print("%s\n", OCTET);
+ else
+ print("%.*s picture\n", utfnlen((char*)buf+5, i-5), (char*)buf+5);
+ return 1;
+ }
+ return 0;
+}
+
+struct offstr
+{
+ ulong off;
+ struct FILE_STRING;
+} offstrs[] = {
+ 32*1024, "\001CD001\001", "ISO9660 CD image", 7, "application/x-iso9660-image",
+ 32*4, "DICM", "DICOM medical imaging data", 4, "application/dicom",
+ 1080, "M.K.", "Amiga module", 4, "audio/mod",
+ 0, 0, 0, 0, 0
+};
+
+int
+isoffstr(void)
+{
+ int n;
+ char buf[256];
+ struct offstr *p;
+
+ for(p = offstrs; p->key; p++) {
+ seek(fd, p->off, 0);
+ n = p->length;
+ if (n > sizeof buf)
+ n = sizeof buf;
+ if (readn(fd, buf, n) != n)
+ continue;
+ if(memcmp(buf, p->key, n) == 0) {
+ print("%s\n", mime ? p->mime : p->filetype);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+iff(void)
+{
+ if (strncmp((char*)buf, "FORM", 4) == 0 &&
+ strncmp((char*)buf+8, "AIFF", 4) == 0) {
+ print("%s\n", mime? "audio/x-aiff": "aiff audio");
+ return 1;
+ }
+ if (strncmp((char*)buf, "RIFF", 4) == 0) {
+ if (strncmp((char*)buf+8, "WAVE", 4) == 0)
+ print("%s\n", mime? "audio/wave": "wave audio");
+ else if (strncmp((char*)buf+8, "AVI ", 4) == 0)
+ print("%s\n", mime? "video/avi": "avi video");
+ else if (strncmp((char*)buf+8, "WEBP", 4) == 0)
+ print("%s\n", mime? "image/webp": "webp image");
+ else
+ print("%s\n", mime? OCTET : "riff file");
+ return 1;
+ }
+ return 0;
+}
+
+char* html_string[] = {
+ "blockquote",
+ "!DOCTYPE", "![CDATA[", "basefont", "frameset", "noframes", "textarea",
+ "caption",
+ "button", "center", "iframe", "object", "option", "script",
+ "select", "strong",
+ "blink", "embed", "frame", "input", "label", "param", "small",
+ "style", "table", "tbody", "tfoot", "thead", "title",
+ "?xml", "body", "code", "font", "form", "head", "html",
+ "link", "menu", "meta", "span",
+ "!--", "big", "dir", "div", "img", "pre", "sub", "sup",
+ "br", "dd", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5",
+ "h6", "hr", "li", "ol", "td", "th", "tr", "tt", "ul",
+ "a", "b", "i", "p", "q", "u",
+ 0,
+};
+
+int
+isudiff(void)
+{
+ char *p;
+
+ p = (char*)buf;
+ if((p = strstr(p, "diff")) != nil)
+ if((p = strchr(p, '\n')) != nil)
+ if(strncmp(++p, "--- ", 4) == 0)
+ if((p = strchr(p, '\n')) != nil)
+ if(strncmp(++p, "+++ ", 4) == 0)
+ if((p = strchr(p, '\n')) != nil)
+ if(strncmp(++p, "@@ ", 3) == 0){
+ print("%s\n", mime ? "text/plain" : "unified diff output");
+ return 1;
+ }
+ return 0;
+}
+
+int
+ishtml(void)
+{
+ int i, n, count;
+ uchar *p;
+
+ count = 0;
+ p = buf;
+ for(;;) {
+ while(p < buf+nbuf && *p != '<')
+ p++;
+ p++;
+ if (p >= buf+nbuf)
+ break;
+ if(*p == '/')
+ p++;
+ if(p >= buf+nbuf)
+ break;
+ for(i = 0; html_string[i]; i++){
+ n = strlen(html_string[i]);
+ if(p + n > buf+nbuf)
+ continue;
+ if(cistrncmp(html_string[i], (char*)p, n) == 0) {
+ p += n;
+ if(p < buf+nbuf && strchr("\t\r\n />", *p)){
+ if(++count > 2) {
+ print("%s\n", mime ? "text/html" : "HTML file");
+ return 1;
+ }
+ }
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+char* rfc822_string[] =
+{
+ "from:",
+ "date:",
+ "to:",
+ "subject:",
+ "received:",
+ "reply to:",
+ "sender:",
+ 0,
+};
+
+int
+isrfc822(void)
+{
+
+ char *p, *q, *r;
+ int i, count;
+
+ count = 0;
+ p = (char*)buf;
+ for(;;) {
+ q = strchr(p, '\n');
+ if(q == nil)
+ break;
+ *q = 0;
+ if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ")){
+ count++;
+ *q = '\n';
+ p = q+1;
+ continue;
+ }
+ *q = '\n';
+ if(*p != '\t' && *p != ' '){
+ r = strchr(p, ':');
+ if(r == 0 || r > q)
+ break;
+ for(i = 0; rfc822_string[i]; i++) {
+ if(cistrncmp(p, rfc822_string[i], strlen(rfc822_string[i])) == 0){
+ count++;
+ break;
+ }
+ }
+ }
+ p = q+1;
+ }
+ if(count >= 3){
+ print("%s\n", mime ? "message/rfc822" : "email file");
+ return 1;
+ }
+ return 0;
+}
+
+int
+ismbox(void)
+{
+ char *p, *q;
+
+ p = (char*)buf;
+ q = strchr(p, '\n');
+ if(q == nil)
+ return 0;
+ *q = 0;
+ if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == nil){
+ print("%s\n", mime ? "application/mbox" : "mail box");
+ return 1;
+ }
+ *q = '\n';
+ return 0;
+}
+
+int
+iscint(void)
+{
+ int type;
+ char *name;
+ Biobuf b;
+
+ if(Binit(&b, fd, OREAD) == Beof)
+ return 0;
+ seek(fd, 0, 0);
+ type = objtype(&b, &name);
+ if(type < 0)
+ return 0;
+ if(mime)
+ print("%s\n", OCTET);
+ else
+ print("%s intermediate\n", name);
+ return 1;
+}
+
+int
+isc(void)
+{
+ int n;
+
+ n = wfreq[I1];
+ /*
+ * includes
+ */
+ if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
+ goto yes;
+ if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] >= n)
+ goto yes;
+ /*
+ * declarations
+ */
+ if(wfreq[Cword] >= 5 && cfreq[';'] >= 5)
+ goto yes;
+ /*
+ * assignments
+ */
+ if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1)
+ goto yes;
+ return 0;
+
+yes:
+ if(mime){
+ print("%s\n", PLAIN);
+ return 1;
+ }
+ if(wfreq[Alword] > 0)
+ print("alef program\n");
+ else
+ print("c program\n");
+ return 1;
+}
+
+int
+islimbo(void)
+{
+ /*
+ * includes
+ */
+ if(wfreq[Lword] < 4)
+ return 0;
+ print("%s\n", mime ? PLAIN : "limbo program");
+ return 1;
+}
+
+int
+isas(void)
+{
+ /*
+ * includes
+ */
+ if(wfreq[Aword] < 2)
+ return 0;
+ print("%s\n", mime ? PLAIN : "as program");
+ return 1;
+}
+
+int
+istga(void)
+{
+ uchar *p;
+
+ p = buf;
+ if(nbuf < 18)
+ return 0;
+ if((p[12] | p[13]<<8) == 0) /* width */
+ return 0;
+ if((p[14] | p[15]<<8) == 0) /* height */
+ return 0;
+ if(p[16] != 8 && p[16] != 15 && p[16] != 16 && p[16] != 24 && p[16] != 32) /* bpp */
+ return 0;
+ if(((p[2]|(1<<3)) & (~3)) != (1<<3)) /* rle flag */
+ return 0;
+ if(p[1] == 0){ /* non color-mapped */
+ if((p[2]&3) != 2 && (p[2]&3) != 3)
+ return 0;
+ if((p[5] | p[6]<<8) != 0) /* palette length */
+ return 0;
+ } else
+ if(p[1] == 1){ /* color-mapped */
+ if((p[2]&3) != 1 || p[7] == 0)
+ return 0;
+ if((p[5] | p[6]<<8) == 0) /* palette length */
+ return 0;
+ } else
+ return 0;
+ print("%s\n", mime ? "image/tga" : "targa image");
+ return 1;
+}
+
+int
+ismp3(void)
+{
+ uchar *p, *e;
+
+ p = buf;
+ e = p + nbuf-1;
+ while((p < e) && (p = memchr(p, 0xFF, e - p))){
+ if((p[1] & 0xFE) == 0xFA){
+ print("%s\n", mime ? "audio/mpeg" : "mp3 audio");
+ return 1;
+ }
+ p++;
+ }
+ return 0;
+}
+
+int
+ismp4(void)
+{
+ if(nbuf <= 12)
+ return 0;
+ if(memcmp(&buf[4], "ftyp", 4) != 0)
+ return 0;
+ if(memcmp(&buf[8], "isom", 4) == 0 || memcmp(&buf[8], "mp4", 3) == 0){
+ print("%s\n", mime ? "video/mp4" : "mp4 video");
+ return 1;
+ }
+ if(memcmp(&buf[8], "M4A ", 4) == 0){
+ print("%s\n", mime ? "audio/m4a" : "m4a audio");
+ return 1;
+ }
+ return 0;
+}
+
+int
+isoggvorbis(void)
+{
+ if(memcmp(&buf[0], "OggS", 4) != 0)
+ return 0;
+ if(memcmp(&buf[29], "vorbis", 6) != 0)
+ return 0;
+ print("%s\n", mime ? "audio/ogg;codecs=vorbis" : "ogg audio");
+ return 1;
+}
+
+int
+isoggopus(void)
+{
+ if(memcmp(&buf[0], "OggS", 4) != 0)
+ return 0;
+ if(memcmp(&buf[28], "OpusHead", 8) != 0)
+ return 0;
+ print("%s\n", mime ? "audio/ogg;codecs=opus" : "ogg audio");
+ return 1;
+}
+
+/*
+ * low entropy means encrypted
+ */
+int
+ismung(void)
+{
+ int i, bucket[8];
+ float cs;
+
+ if(nbuf < 64)
+ return 0;
+ memset(bucket, 0, sizeof(bucket));
+ for(i=nbuf-64; i<nbuf; i++)
+ bucket[(buf[i]>>5)&07] += 1;
+
+ cs = 0.;
+ for(i=0; i<8; i++)
+ cs += (bucket[i]-8)*(bucket[i]-8);
+ cs /= 8.;
+ if(cs <= 24.322) {
+ if(buf[0]==0x1f && buf[1]==0x9d)
+ print("%s\n", mime ? "application/x-compress" : "compressed");
+ else
+ if(buf[0]==0x1f && buf[1]==0x8b)
+ print("%s\n", mime ? "application/x-gzip" : "gzip compressed");
+ else
+ if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
+ print("%s\n", mime ? "application/x-bzip2" : "bzip2 compressed");
+ else
+ if(buf[0]==0x78 && buf[1]==0x9c)
+ print("%s\n", mime ? "application/x-deflate" : "zlib compressed");
+ else
+ print("%s\n", mime ? OCTET : "encrypted");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * english by punctuation and frequencies
+ */
+int
+isenglish(void)
+{
+ int vow, comm, rare, badpun, punct;
+ char *p;
+
+ if(guess != Fascii && guess != Feascii)
+ return 0;
+ badpun = 0;
+ punct = 0;
+ for(p = (char *)buf; p < (char *)buf+nbuf-1; p++)
+ switch(*p) {
+ case '.':
+ case ',':
+ case ')':
+ case '%':
+ case ';':
+ case ':':
+ case '?':
+ punct++;
+ if(p[1] != ' ' && p[1] != '\n')
+ badpun++;
+ }
+ if(badpun*5 > punct)
+ return 0;
+ if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shell file test */
+ return 0;
+ if(2*cfreq[';'] > cfreq['e'])
+ return 0;
+
+ vow = 0;
+ for(p="AEIOU"; *p; p++) {
+ vow += cfreq[*p];
+ vow += cfreq[tolower(*p)];
+ }
+ comm = 0;
+ for(p="ETAION"; *p; p++) {
+ comm += cfreq[*p];
+ comm += cfreq[tolower(*p)];
+ }
+ rare = 0;
+ for(p="VJKQXZ"; *p; p++) {
+ rare += cfreq[*p];
+ rare += cfreq[tolower(*p)];
+ }
+ if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) {
+ print("%s\n", mime ? PLAIN : "English text");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * pick up a number with
+ * syntax _*[0-9]+_
+ */
+#define P9BITLEN 12
+int
+p9bitnum(char *s, int *v)
+{
+ char *es;
+
+ if(s[P9BITLEN-1] != ' ')
+ return -1;
+ s[P9BITLEN-1] = '\0';
+ *v = strtol(s, &es, 10);
+ s[P9BITLEN-1] = ' ';
+ if(es != &s[P9BITLEN-1])
+ return -1;
+ return 0;
+}
+
+int
+depthof(char *s, int *newp)
+{
+ char *es;
+ int d;
+
+ *newp = 0;
+ es = s+12;
+ while(s<es && *s==' ')
+ s++;
+ if(s == es)
+ return -1;
+ if('0'<=*s && *s<='9')
+ return 1<<strtol(s, nil, 0);
+
+ *newp = 1;
+ d = 0;
+ while(s<es && *s!=' '){
+ if(strchr("rgbkamx", *s) == nil)
+ return -1;
+ s++;
+ if('0'<=*s && *s<='9')
+ d += strtoul(s, &s, 10);
+ else
+ return -1;
+ }
+
+ if(d % 8 == 0 || 8 % d == 0)
+ return d;
+ else
+ return -1;
+}
+
+int
+isp9bit(void)
+{
+ int dep, lox, loy, hix, hiy, px, new, cmpr;
+ long len;
+ char *newlabel;
+ uchar *cp;
+
+ cp = buf;
+ cmpr = 0;
+ if(memcmp(cp, "compressed\n", 11) == 0) {
+ cmpr = 1;
+ cp = buf + 11;
+ }
+
+ if((dep = depthof((char*)cp + 0*P9BITLEN, &new)) < 0)
+ return 0;
+ newlabel = new ? "" : "old ";
+ if(p9bitnum((char*)cp + 1*P9BITLEN, &lox) < 0)
+ return 0;
+ if(p9bitnum((char*)cp + 2*P9BITLEN, &loy) < 0)
+ return 0;
+ if(p9bitnum((char*)cp + 3*P9BITLEN, &hix) < 0)
+ return 0;
+ if(p9bitnum((char*)cp + 4*P9BITLEN, &hiy) < 0)
+ return 0;
+
+ hix -= lox;
+ hiy -= loy;
+ if(hix <= 0 || hiy <= 0)
+ return 0;
+
+ if(dep < 8){
+ px = 8/dep; /* pixels per byte */
+ /* set l to number of bytes of data per scan line */
+ len = (hix+px-1)/px;
+ }else
+ len = hix*dep/8;
+ len *= hiy; /* col length */
+ len += 5 * P9BITLEN; /* size of initial ascii */
+
+ /*
+ * for compressed images, don't look any further. otherwise:
+ * for image file, length is non-zero and must match calculation above.
+ * for /dev/window and /dev/screen the length is always zero.
+ * for subfont, the subfont header should follow immediately.
+ */
+ if (cmpr) {
+ print(mime ? "image/p9bit\n" : "Compressed %splan 9 image or subfont, depth %d, size %dx%d\n",
+ newlabel, dep, hix, hiy);
+ return 1;
+ }
+ /*
+ * mbuf->length == 0 probably indicates reading a pipe.
+ * Ghostscript sometimes produces a little extra on the end.
+ */
+ if (len != 0 && (mbuf->length == 0 || mbuf->length == len ||
+ mbuf->length > len && mbuf->length < len+P9BITLEN)) {
+ print(mime ? "image/p9bit\n" : "%splan 9 image, depth %d, size %dx%d\n",
+ newlabel, dep, hix, hiy);
+ return 1;
+ }
+ if (p9subfont(buf+len)) {
+ print(mime ? "image/p9bit\n" : "%ssubfont file, depth %d, size %dx%d\n",
+ newlabel, dep, hix, hiy);
+ return 1;
+ }
+ return 0;
+}
+
+int
+p9subfont(uchar *p)
+{
+ int n, h, a;
+
+ /* if image too big, assume it's a subfont */
+ if (p+3*P9BITLEN > buf+sizeof(buf))
+ return 1;
+
+ if (p9bitnum((char*)p + 0*P9BITLEN, &n) < 0) /* char count */
+ return 0;
+ if (p9bitnum((char*)p + 1*P9BITLEN, &h) < 0) /* height */
+ return 0;
+ if (p9bitnum((char*)p + 2*P9BITLEN, &a) < 0) /* ascent */
+ return 0;
+ if(n > 0 && h > 0 && a >= 0)
+ return 1;
+ return 0;
+}
+
+#define WHITESPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
+
+int
+isp9font(void)
+{
+ uchar *cp, *p;
+ int i, n;
+ char pathname[1024];
+
+ cp = buf;
+ if (!getfontnum(cp, &cp)) /* height */
+ return 0;
+ if (!getfontnum(cp, &cp)) /* ascent */
+ return 0;
+ for (i = 0; cp=(uchar*)strchr((char*)cp, '\n'); i++) {
+ if (!getfontnum(cp, &cp)) /* min */
+ break;
+ if (!getfontnum(cp, &cp)) /* max */
+ return 0;
+ getfontnum(cp, &cp); /* optional offset */
+ while (WHITESPACE(*cp))
+ cp++;
+ for (p = cp; *cp && !WHITESPACE(*cp); cp++)
+ ;
+ /* construct a path name, if needed */
+ n = 0;
+ if (*p != '/' && slash) {
+ n = slash-fname+1;
+ if (n < sizeof(pathname))
+ memcpy(pathname, fname, n);
+ else n = 0;
+ }
+ if (n+cp-p+4 < sizeof(pathname)) {
+ memcpy(pathname+n, p, cp-p);
+ n += cp-p;
+ pathname[n] = 0;
+ if (access(pathname, AEXIST) < 0) {
+ strcpy(pathname+n, ".0");
+ if (access(pathname, AEXIST) < 0)
+ return 0;
+ }
+ }
+ }
+ if (i) {
+ print("%s\n", mime ? PLAIN : "font file");
+ return 1;
+ }
+ return 0;
+}
+
+int
+getfontnum(uchar *cp, uchar **rp)
+{
+ while (WHITESPACE(*cp)) /* extract ulong delimited by whitespace */
+ cp++;
+ if (*cp < '0' || *cp > '9')
+ return 0;
+ strtoul((char *)cp, (char **)rp, 0);
+ if (!WHITESPACE(**rp)) {
+ *rp = cp;
+ return 0;
+ }
+ return 1;
+}
+
+int
+isrtf(void)
+{
+ if(strstr((char *)buf, "\\rtf1")){
+ print(mime ? "application/rtf\n" : "rich text format\n");
+ return 1;
+ }
+ return 0;
+}
+
+int
+ismsdos(void)
+{
+ if (buf[0] == 0x4d && buf[1] == 0x5a){
+ print(mime ? "application/x-msdownload\n" : "MSDOS executable\n");
+ return 1;
+ }
+ return 0;
+}
+
+int
+isicocur(void)
+{
+ if(buf[0] || buf[1] || buf[3] || buf[9])
+ return 0;
+ if(buf[4] == 0x00 && buf[5] == 0x00)
+ return 0;
+ switch(buf[2]){
+ case 1:
+ print(mime ? "image/x-icon\n" : "Microsoft icon file\n");
+ return 1;
+ case 2:
+ print(mime ? "image/x-icon\n" : "Microsoft cursor file\n");
+ return 1;
+ }
+ return 0;
+}
+
+int
+iself(void)
+{
+ static char *cpu[] = { /* NB: incomplete and arbitary list */
+ [1] "WE32100",
+ [2] "SPARC",
+ [3] "i386",
+ [4] "M68000",
+ [5] "M88000",
+ [6] "i486",
+ [7] "i860",
+ [8] "R3000",
+ [9] "S370",
+ [10] "R4000",
+ [15] "HP-PA",
+ [18] "sparc v8+",
+ [19] "i960",
+ [20] "PPC-32",
+ [21] "PPC-64",
+ [40] "ARM",
+ [41] "Alpha",
+ [43] "sparc v9",
+ [50] "IA-64",
+ [62] "AMD64",
+ [75] "VAX",
+ };
+ static char *type[] = {
+ [1] "relocatable object",
+ [2] "executable",
+ [3] "shared library",
+ [4] "core dump",
+ };
+
+ if (memcmp(buf, "\x7fELF", 4) == 0){
+ if (!mime){
+ int isdifend = 0;
+ int n = (buf[19] << 8) | buf[18];
+ char *p = "unknown";
+ char *t = "unknown";
+
+ if (n > 0 && n < nelem(cpu) && cpu[n])
+ p = cpu[n];
+ else {
+ /* try the other byte order */
+ isdifend = 1;
+ n = (buf[18] << 8) | buf[19];
+ if (n > 0 && n < nelem(cpu) && cpu[n])
+ p = cpu[n];
+ }
+ if(isdifend)
+ n = (buf[16]<< 8) | buf[17];
+ else
+ n = (buf[17]<< 8) | buf[16];
+
+ if(n>0 && n < nelem(type) && type[n])
+ t = type[n];
+ print("%s ELF %s\n", p, t);
+ }
+ else
+ print("application/x-elf-executable\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+isface(void)
+{
+ int i, j, ldepth, l;
+ char *p;
+
+ ldepth = -1;
+ for(j = 0; j < 3; j++){
+ for(p = (char*)buf, i=0; i<3; i++){
+ if(p[0] != '0' || p[1] != 'x')
+ return 0;
+ if(buf[2+8] == ',')
+ l = 2;
+ else if(buf[2+4] == ',')
+ l = 1;
+ else
+ return 0;
+ if(ldepth == -1)
+ ldepth = l;
+ if(l != ldepth)
+ return 0;
+ strtoul(p, &p, 16);
+ if(*p++ != ',')
+ return 0;
+ while(*p == ' ' || *p == '\t')
+ p++;
+ }
+ if (*p++ != '\n')
+ return 0;
+ }
+
+ if(mime)
+ print("application/x-face\n");
+ else
+ print("face image depth %d\n", ldepth);
+ return 1;
+}