shithub: libtags

Download patch

ref: d8185e7273233384dcf32da043c28446082df1d2
author: Sigrid Solveig Haflínudóttir <ftrvxmtrx@gmail.com>
date: Wed Oct 9 15:21:55 EDT 2019

squash

diff: cannot open b/examples//null: file does not exist: 'b/examples//null' diff: cannot open b/src//null: file does not exist: 'b/src//null'
--- /dev/null
+++ b/.gitignore
@@ -1,0 +1,4 @@
+*.[1-9]
+*.o
+*.out
+result
--- /dev/null
+++ b/LICENSE
@@ -1,0 +1,18 @@
+Copyright © 2013-2019 Sigrid Solveig Haflínudóttir
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null
+++ b/README.md
@@ -1,0 +1,41 @@
+libtags
+=======
+
+A cross-platform library for reading tags, designed for highly constrained environments.
+
+Comparison to id3lib and taglib:
+
+|                | libtags         | id3lib           | taglib           |
+|:---------------|:----------------|:-----------------|:-----------------|
+| ID3v2.4        | yes             | no               | yes              |
+| Ogg/Vorbis     | yes             | no               | yes              |
+| FLAC           | yes             | no               | yes              |
+| m4a            | yes             | no               | yes              |
+| replay gain    | yes             | no               | ???              |
+| size           | tiny            | bloated          | more bloated     |
+| license        | MIT             | LGPL             | LGPL/MPL         |
+| written in     | C               | C++              | C++              |
+| memory         | no allocations  | allocates memory | allocates memory |
+| thread safe    | yes             | ???              | ???              |
+| speed          | ultra-fast      | slow             | fast             |
+| tag writing    | no, not a goal  | yes              | yes              |
+| Plan 9 support | yes, native     | no               | no               |
+
+CPU time (784 files: mp3, ogg, flac):
+
+|                | libtags          | taglib           |
+|:---------------|:-----------------|:-----------------|
+| files cached   | real    0m0.027s | real    0m0.155s |
+|                | user    0m0.014s | user    0m0.102s |
+|                | sys     0m0.012s | sys     0m0.053s |
+|                |                  |                  |
+| cache dropped  | real    0m1.158s | real    0m1.628s |
+|                | user    0m0.024s | user    0m0.211s |
+|                | sys     0m0.132s | sys     0m0.187s |
+
+## Usage
+
+Just compile it to an archive (`.a`) and link to your program. Use it in your code
+by including `tags.h`, that's the API. Documentation is in the header.
+
+See `examples/readtags.c`.
--- /dev/null
+++ b/default.nix
@@ -1,0 +1,22 @@
+{ stdenv, mk, pkgconfig, fetchgitLocal }:
+
+stdenv.mkDerivation rec {
+  name = "libtags";
+  src = ./.;#fetchgitLocal ./.;
+
+  buildInputs = [ mk ];
+  propagatedBuildInputs = [ pkgconfig ];
+  enableParallelBuilding = true;
+
+  installPhase = ''
+    cd src && mk -f mkfile.nix install && cd ..
+    cd examples && mk -f mkfile.nix install INCLUDES=-I$out/include LIBS="-L$out/lib -ltags"
+  '';
+
+  meta = {
+    description = "A cross-platform library for reading tags, designed for highly constrained environments";
+    maintainers = with stdenv.lib.maintainers; [ ftrvxmtrx ];
+    platforms = stdenv.lib.platforms.unix;
+    license = stdenv.lib.licenses.mit;
+  };
+}
--- /dev/null
+++ b/examples/mkfile
@@ -1,0 +1,13 @@
+</$objtype/mkfile
+
+TARG=readtags
+
+OFILES=\
+	readtags.$O\
+	
+BIN=/$objtype/bin
+
+HFILES=\
+	/sys/include/tags.h\
+
+</sys/src/cmd/mkone
--- /dev/null
+++ b/examples/mkfile.nix
@@ -1,0 +1,11 @@
+TARG=readtags
+
+<$MKRULES/defaults
+
+OFILES=\
+	readtags.$O\
+
+HFILES=\
+	tags.h\
+
+<$MKRULES/mkone
--- /dev/null
+++ b/examples/readtags.c
@@ -1,0 +1,108 @@
+#ifdef __unix__
+#define _DEFAULT_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <unistd.h>
+#define print printf
+#define seek lseek
+#define nil NULL
+#define OREAD O_RDONLY
+#define USED(x) (void)x
+#else
+#include <u.h>
+#include <libc.h>
+#endif
+#include <tags.h>
+
+typedef struct Aux Aux;
+
+struct Aux
+{
+	int fd;
+};
+
+static const char *t2s[] =
+{
+	[Tartist] = "artist",
+	[Talbum] = "album",
+	[Ttitle] = "title",
+	[Tdate] = "date",
+	[Ttrack] = "track",
+	[Talbumgain] = "albumgain",
+	[Talbumpeak] = "albumpeak",
+	[Ttrackgain] = "trackgain",
+	[Ttrackpeak] = "trackpeak",
+	[Tgenre] = "genre",
+	[Timage] = "image",
+};
+
+static void
+cb(Tagctx *ctx, int t, const char *v, int offset, int size, Tagread f)
+{
+	USED(ctx); USED(offset); USED(size); USED(f);
+	if(t == Timage)
+		print("%-12s %s %d %d\n", t2s[t], v, offset, size);
+	else
+		print("%-12s %s\n", t2s[t], v);
+}
+
+static int
+ctxread(Tagctx *ctx, void *buf, int cnt)
+{
+	Aux *aux = ctx->aux;
+	return read(aux->fd, buf, cnt);
+}
+
+static int
+ctxseek(Tagctx *ctx, int offset, int whence)
+{
+	Aux *aux = ctx->aux;
+	return seek(aux->fd, offset, whence);
+}
+
+int
+main(int argc, char **argv)
+{
+	int i;
+	char buf[256];
+	Aux aux;
+	Tagctx ctx =
+	{
+		.read = ctxread,
+		.seek = ctxseek,
+		.tag = cb,
+		.buf = buf,
+		.bufsz = sizeof(buf),
+		.aux = &aux,
+	};
+
+	if(argc < 2){
+		print("usage: readtags FILE...\n");
+		return -1;
+	}
+
+	for(i = 1; i < argc; i++){
+		print("*** %s\n", argv[i]);
+		if((aux.fd = open(argv[i], OREAD)) < 0)
+			print("failed to open\n");
+		else{
+			ctx.filename = argv[i];
+			if(tagsget(&ctx) != 0)
+				print("no tags or failed to read tags\n");
+			else{
+				if(ctx.duration > 0)
+					print("%-12s %d ms\n", "duration", ctx.duration);
+				if(ctx.samplerate > 0)
+					print("%-12s %d\n", "samplerate", ctx.samplerate);
+				if(ctx.channels > 0)
+					print("%-12s %d\n", "channels", ctx.channels);
+				if(ctx.bitrate > 0)
+					print("%-12s %d\n", "bitrate", ctx.bitrate);
+			}
+			close(aux.fd);
+		}
+		print("\n");
+	}
+	return 0;
+}
--- /dev/null
+++ b/src/8859.c
@@ -1,0 +1,29 @@
+/* http://en.wikipedia.org/wiki/ISO/IEC_8859-1 */
+#include "tagspriv.h"
+
+int
+iso88591toutf8(uchar *o, int osz, const uchar *s, int sz)
+{
+	int i;
+
+	for(i = 0; i < sz && osz > 1 && s[i] != 0; i++){
+		if(s[i] >= 0xa0 && osz < 3)
+			break;
+
+		if(s[i] >= 0xc0){
+			*o++ = 0xc3;
+			*o++ = s[i] - 0x40;
+			osz--;
+		}else if(s[i] >= 0xa0){
+			*o++ = 0xc2;
+			*o++ = s[i];
+			osz--;
+		}else{
+			*o++ = s[i];
+		}
+		osz--;
+	}
+
+	*o = 0;
+	return i;
+}
--- /dev/null
+++ b/src/flac.c
@@ -1,0 +1,105 @@
+/* https://xiph.org/flac/format.html */
+#include "tagspriv.h"
+
+#define beu3(d)   ((d)[0]<<16 | (d)[1]<<8  | (d)[2]<<0)
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+#define leuint(d) (uint)((d)[3]<<24 | (d)[2]<<16 | (d)[1]<<8 | (d)[0]<<0)
+
+int
+tagflac(Tagctx *ctx)
+{
+	uchar *d;
+	int sz, last;
+	uvlong g;
+
+	d = (uchar*)ctx->buf;
+	/* 8 bytes for marker, block type, length. 18 bytes for the stream info */
+	if(ctx->read(ctx, d, 8+18) != 8+18 || memcmp(d, "fLaC\x00", 5) != 0)
+		return -1;
+
+	sz = beu3(&d[5]); /* size of the stream info */
+	ctx->samplerate = beu3(&d[18]) >> 4;
+	ctx->channels = ((d[20]>>1) & 7) + 1;
+	g = (uvlong)(d[21] & 0xf)<<32 | beu3(&d[22])<<8 | d[25];
+	ctx->duration = g * 1000 / ctx->samplerate;
+
+	/* skip the rest of the stream info */
+	if(ctx->seek(ctx, sz-18, 1) != 8+sz)
+		return -1;
+
+	for(last = 0; !last;){
+		if(ctx->read(ctx, d, 4) != 4)
+			return -1;
+
+		sz = beu3(&d[1]);
+		if((d[0] & 0x80) != 0)
+			last = 1;
+
+		if((d[0] & 0x7f) == 6){ /* 6 = picture */
+			int n, offset;
+			char *mime;
+
+			if(sz < 16 || ctx->read(ctx, d, 8) != 8) /* type, mime length */
+				return -1;
+			sz -= 8;
+			n = beuint(&d[4]);
+			mime = ctx->buf+20;
+			if(n >= sz || n >= ctx->bufsz-1 || ctx->read(ctx, mime, n) != n)
+				return -1;
+			mime[n] = 0;
+			ctx->read(ctx, d, 4); /* description */
+			offset = beuint(d) + ctx->seek(ctx, 0, 1) + 20;
+			ctx->read(ctx, d, 20);
+			n = beuint(&d[16]);
+			tagscallcb(ctx, Timage, mime, offset, n, nil);
+		}else if((d[0] & 0x7f) == 4){ /* 4 = vorbis comment */
+			int i, numtags, tagsz, vensz;
+			char *k, *v;
+
+			if(sz < 12 || ctx->read(ctx, d, 4) != 4)
+				return -1;
+
+			sz -= 4;
+			vensz = leuint(d);
+			if(vensz < 0 || vensz > sz-8)
+				return -1;
+			/* skip vendor, read the number of tags */
+			if(ctx->seek(ctx, vensz, 1) < 0 || ctx->read(ctx, d, 4) != 4)
+				return -1;
+			sz -= 4;
+			numtags = leuint(d);
+
+			for(i = 0; i < numtags && sz > 4; i++){
+				if(ctx->read(ctx, d, 4) != 4)
+					return -1;
+				tagsz = leuint(d);
+				sz -= 4;
+				if(tagsz > sz)
+					return -1;
+
+				/* if it doesn't fit, ignore it */
+				if(tagsz+1 > ctx->bufsz){
+					if(ctx->seek(ctx, tagsz, 1) < 0)
+						return -1;
+					continue;
+				}
+
+				k = ctx->buf;
+				if(ctx->read(ctx, k, tagsz) != tagsz)
+					return -1;
+				/* some tags have a stupid '\r'; ignore */
+				if(k[tagsz-1] == '\r')
+					k[tagsz-1] = 0;
+				k[tagsz] = 0;
+
+				if((v = strchr(k, '=')) != nil){
+					*v++ = 0;
+					cbvorbiscomment(ctx, k, v);
+				}
+			}
+		}else if(ctx->seek(ctx, sz, 1) <= 0)
+			return -1;
+	}
+
+	return 0;
+}
--- /dev/null
+++ b/src/id3genres.c
@@ -1,0 +1,42 @@
+#include "tagspriv.h"
+
+const char *id3genres[Numgenre] =
+{
+	"Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk",
+	"Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies",
+	"Other", "Pop", "Rhythm and Blues", "Rap", "Reggae", "Rock",
+	"Techno", "Industrial", "Alternative", "Ska", "Death Metal",
+	"Pranks", "Soundtrack", "Euro-Techno", "Ambient", "Trip-Hop",
+	"Vocal", "Jazz & Funk", "Fusion", "Trance", "Classical",
+	"Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel",
+	"Noise", "Alternative Rock", "Bass", "Soul", "Punk rock", "Space",
+	"Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic",
+	"Gothic", "Darkwave", "Techno-Industrial", "Electronic",
+	"Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy",
+	"Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk",
+	"Jungle", "Native American", "Cabaret", "New Wave", "Psychedelic",
+	"Rave", "Showtunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk",
+	"Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock",
+	"Folk", "Folk-Rock", "National Folk", "Swing", "Fast Fusion",
+	"Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde",
+	"Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock",
+	"Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic",
+	"Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata",
+	"Symphony", "Booty Bass", "Primus", "Porn groove", "Satire", "Slow Jam",
+	"Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad",
+	"Rhythmic Soul", "Freestyle", "Duet", "Punk rock", "Drum Solo", "A capella",
+	"Euro-House", "Dance Hall", "Goa Trance", "Drum & Bass",
+	"Club-House", "Hardcore Techno", "Terror", "Indie", "BritPop",
+	"Afro-punk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal",
+	"Black Metal", "Crossover", "Contemporary Christian", "Christian Rock",
+	"Merengue", "Salsa", "Thrash Metal", "Anime", "Jpop", "Synthpop",
+	"Abstract", "Art Rock", "Baroque", "Bhangra", "Big Beat",
+	"Breakbeat", "Chillout", "Downtempo", "Dub", "EBM", "Eclectic",
+	"Electro", "Electroclash", "Emo", "Experimental", "Garage",
+	"Global", "IDM", "Illbient", "Industro-Goth", "Jam Band",
+	"Krautrock", "Leftfield", "Lounge", "Math Rock", "New Romantic",
+	"Nu-Breakz", "Post-Punk", "Post-Rock", "Psytrance", "Shoegaze",
+	"Space Rock", "Trop Rock", "World Music", "Neoclassical",
+	"Audiobook", "Audio Theatre", "Neue Deutsche Welle", "Podcast",
+	"Indie Rock", "G-Funk", "Dubstep", "Garage Rock", "Psybient",
+};
--- /dev/null
+++ b/src/id3v1.c
@@ -1,0 +1,48 @@
+/*
+ * http://en.wikipedia.org/wiki/ID3
+ * Space-padded strings are mentioned there. This is wrong and is a lie.
+ */
+#include "tagspriv.h"
+
+enum
+{
+	Insz = 128,
+	Outsz = 61,
+};
+
+int
+tagid3v1(Tagctx *ctx)
+{
+	uchar *in, *out;
+
+	if(ctx->bufsz < Insz+Outsz)
+		return -1;
+	in = (uchar*)ctx->buf;
+	out = in + Insz;
+
+	if(ctx->seek(ctx, -Insz, 2) < 0)
+		return -1;
+	if(ctx->read(ctx, in, Insz) != Insz || memcmp(in, "TAG", 3) != 0)
+		return -1;
+
+	if((ctx->found & 1<<Ttitle) == 0 && iso88591toutf8(out, Outsz, &in[3], 30) > 0)
+		txtcb(ctx, Ttitle, out);
+	if((ctx->found & 1<<Tartist) == 0 && iso88591toutf8(out, Outsz, &in[33], 30) > 0)
+		txtcb(ctx, Tartist, out);
+	if((ctx->found & 1<<Talbum) == 0 && iso88591toutf8(out, Outsz, &in[63], 30) > 0)
+		txtcb(ctx, Talbum, out);
+
+	in[93+4] = 0;
+	if((ctx->found & 1<<Tdate) == 0 && in[93] != 0)
+		txtcb(ctx, Tdate, &in[93]);
+
+	if((ctx->found & 1<<Ttrack) == 0 && in[125] == 0 && in[126] > 0){
+		snprint((char*)out, Outsz, "%d", in[126]);
+		txtcb(ctx, Ttrack, out);
+	}
+
+	if((ctx->found & 1<<Tgenre) == 0 && in[127] < Numgenre)
+		txtcb(ctx, Tgenre, id3genres[in[127]]);
+
+	return 0;
+}
--- /dev/null
+++ b/src/id3v2.c
@@ -1,0 +1,450 @@
+/*
+ * Have fun reading the following:
+ *
+ * http://id3.org/id3v2.4.0-structure
+ * http://id3.org/id3v2.4.0-frames
+ * http://id3.org/d3v2.3.0
+ * http://id3.org/id3v2-00
+ * http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
+ * http://wiki.hydrogenaud.io/index.php?title=MP3#VBRI.2C_XING.2C_and_LAME_headers
+ * http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#VBRIHeader
+ */
+#include "tagspriv.h"
+
+#define synchsafe(d) (uint)(((d)[0]&127)<<21 | ((d)[1]&127)<<14 | ((d)[2]&127)<<7 | ((d)[3]&127)<<0)
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+
+static int
+v2cb(Tagctx *ctx, char *k, char *v)
+{
+	k++;
+	if(strcmp(k, "AL") == 0 || strcmp(k, "ALB") == 0)
+		txtcb(ctx, Talbum, v);
+	else if(strcmp(k, "PE1") == 0 || strcmp(k, "PE2") == 0 || strcmp(k, "P1") == 0 || strcmp(k, "P2") == 0)
+		txtcb(ctx, Tartist, v);
+	else if(strcmp(k, "IT2") == 0 || strcmp(k, "T2") == 0)
+		txtcb(ctx, Ttitle, v);
+	else if(strcmp(k, "YE") == 0 || strcmp(k, "YER") == 0 || strcmp(k, "DRC") == 0)
+		txtcb(ctx, Tdate, v);
+	else if(strcmp(k, "RK") == 0 || strcmp(k, "RCK") == 0)
+		txtcb(ctx, Ttrack, v);
+	else if(strcmp(k, "CO") == 0 || strcmp(k, "CON") == 0){
+		for(; v[0]; v++){
+			if(v[0] == '(' && v[1] <= '9' && v[1] >= '0'){
+				int i = atoi(&v[1]);
+				if(i < Numgenre)
+					txtcb(ctx, Tgenre, id3genres[i]);
+				for(v++; v[0] && v[0] != ')'; v++);
+				v--;
+			}else if(v[0] != '(' && v[0] != ')'){
+				txtcb(ctx, Tgenre, v);
+				break;
+			}
+		}
+	}else if(strcmp(k, "XXX") == 0 && strncmp(v, "REPLAYGAIN_", 11) == 0){
+		int type = -1;
+		v += 11;
+		if(strncmp(v, "TRACK_", 6) == 0){
+			v += 6;
+			if(strcmp(v, "GAIN") == 0)
+				type = Ttrackgain;
+			else if(strcmp(v, "PEAK") == 0)
+				type = Ttrackpeak;
+		}else if(strncmp(v, "ALBUM_", 6) == 0){
+			v += 6;
+			if(strcmp(v, "GAIN") == 0)
+				type = Talbumgain;
+			else if(strcmp(v, "PEAK") == 0)
+				type = Talbumpeak;
+		}
+		if(type >= 0)
+			txtcb(ctx, type, v+5);
+		else
+			return 0;
+	}else
+		return 0;
+	return 1;
+}
+
+static int
+rva2(Tagctx *ctx, char *tag, int sz)
+{
+	uchar *b, *end;
+
+	if((b = memchr(tag, 0, sz)) == nil)
+		return -1;
+	b++;
+	for(end = (uchar*)tag+sz; b+4 < end; b += 5){
+		int type = b[0];
+		float peak;
+		float va = (float)(b[1]<<8 | b[2]) / 512.0f;
+
+		if(b[3] == 24){
+			peak = (float)(b[4]<<16 | b[5]<<8 | b[6]) / 32768.0f;
+			b += 2;
+		}else if(b[3] == 16){
+			peak = (float)(b[4]<<8 | b[5]) / 32768.0f;
+			b += 1;
+		}else if(b[3] == 8){
+			peak = (float)b[4] / 32768.0f;
+		}else
+			return -1;
+
+		if(type == 1){ /* master volume */
+			char vas[16], peaks[8];
+			snprint(vas, sizeof(vas), "%+.5f dB", va);
+			snprint(peaks, sizeof(peaks), "%.5f", peak);
+			vas[sizeof(vas)-1] = 0;
+			peaks[sizeof(peaks)-1] = 0;
+
+			if(strcmp((char*)tag, "track") == 0){
+				txtcb(ctx, Ttrackgain, vas);
+				txtcb(ctx, Ttrackpeak, peaks);
+			}else if(strcmp((char*)tag, "album") == 0){
+				txtcb(ctx, Talbumgain, vas);
+				txtcb(ctx, Talbumpeak, peaks);
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static int
+resync(uchar *b, int sz)
+{
+	int i;
+
+	if(sz < 4)
+		return sz;
+	for(i = 0; i < sz-2; i++){
+		if(b[i] == 0xff && b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){
+			memmove(&b[i+1], &b[i+2], sz-i-2);
+			sz--;
+		}
+	}
+	return sz;
+}
+
+static int
+unsyncread(void *buf, int *sz)
+{
+	int i;
+	uchar *b;
+
+	b = buf;
+	for(i = 0; i < *sz; i++){
+		if(b[i] == 0xff){
+			if(i+1 >= *sz || (b[i+1] == 0x00 && i+2 >= *sz))
+				break;
+			if(b[i+1] == 0x00 && (b[i+2] & 0xe0) == 0xe0){
+				memmove(&b[i+1], &b[i+2], *sz-i-2);
+				(*sz)--;
+			}
+		}
+	}
+	return i;
+}
+
+static int
+nontext(Tagctx *ctx, uchar *d, int tsz, int unsync)
+{
+	int n, offset;
+	char *b, *tag;
+	Tagread f;
+
+	tag = ctx->buf;
+	n = 0;
+	f = unsync ? unsyncread : nil;
+	if(strcmp((char*)d, "APIC") == 0){
+		offset = ctx->seek(ctx, 0, 1);
+		if((n = ctx->read(ctx, tag, 256)) == 256){ /* APIC mime and description should fit */
+			b = tag + 1; /* mime type */
+			for(n = 1 + strlen(b) + 2; n < 253; n++){
+				if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */
+					if(tag[n] == 0){
+						n++;
+						break;
+					}
+				}else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){
+					n += 3;
+					break;
+				}
+			}
+			tagscallcb(ctx, Timage, b, offset+n, tsz-n, f);
+			n = 256;
+		}
+	}else if(strcmp((char*)d, "PIC") == 0){
+		offset = ctx->seek(ctx, 0, 1);
+		if((n = ctx->read(ctx, tag, 256)) == 256){ /* PIC description should fit */
+			b = tag + 1; /* mime type */
+			for(n = 5; n < 253; n++){
+				if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */
+					if(tag[n] == 0){
+						n++;
+						break;
+					}
+				}else if(tag[n] == 0 && tag[n+1] == 0 && tag[n+2] == 0){
+					n += 3;
+					break;
+				}
+			}
+			tagscallcb(ctx, Timage, strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png", offset+n, tsz-n, f);
+			n = 256;
+		}
+	}else if(strcmp((char*)d, "RVA2") == 0 && tsz >= 6+5){
+		/* replay gain. 6 = "track\0", 5 = other */
+		if(ctx->bufsz >= tsz && (n = ctx->read(ctx, tag, tsz)) == tsz)
+			rva2(ctx, tag, unsync ? resync((uchar*)tag, n) : n);
+	}
+
+	return ctx->seek(ctx, tsz-n, 1) < 0 ? -1 : 0;
+}
+
+static int
+text(Tagctx *ctx, uchar *d, int tsz, int unsync)
+{
+	char *b, *tag;
+
+	if(ctx->bufsz >= tsz+1){
+		/* place the data at the end to make best effort at charset conversion */
+		tag = &ctx->buf[ctx->bufsz - tsz - 1];
+		if(ctx->read(ctx, tag, tsz) != tsz)
+			return -1;
+	}else{
+		ctx->seek(ctx, tsz, 1);
+		return 0;
+	}
+
+	if(unsync)
+		tsz = resync((uchar*)tag, tsz);
+
+	tag[tsz] = 0;
+	b = &tag[1];
+
+	switch(tag[0]){
+	case 0: /* iso-8859-1 */
+		if(iso88591toutf8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0)
+			v2cb(ctx, (char*)d, ctx->buf);
+		break;
+	case 1: /* utf-16 */
+	case 2:
+		if(utf16to8((uchar*)ctx->buf, ctx->bufsz, (uchar*)b, tsz) > 0)
+			v2cb(ctx, (char*)d, ctx->buf);
+		break;
+	case 3: /* utf-8 */
+		if(*b)
+			v2cb(ctx, (char*)d, b);
+		break;
+	}
+
+	return 0;
+}
+
+static int
+isid3(uchar *d)
+{
+	/* "ID3" version[2] flags[1] size[4] */
+	return (
+		d[0] == 'I' && d[1] == 'D' && d[2] == '3' &&
+		d[3] < 0xff && d[4] < 0xff &&
+		d[6] < 0x80 && d[7] < 0x80 && d[8] < 0x80 && d[9] < 0x80
+	);
+}
+
+static const uchar bitrates[4][4][16] = {
+	{
+		{0},
+		{0,  4,  8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64,  72,  80, 0}, /* v2.5 III */
+		{0,  4,  8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64,  72,  80, 0}, /* v2.5 II */
+		{0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2.5 I */
+	},
+	{ {0}, {0}, {0}, {0} },
+	{
+		{0},
+		{0,  4,  8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64,  72,  80, 0}, /* v2 III */
+		{0,  4,  8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64,  72,  80, 0}, /* v2 II */
+		{0, 16, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 0}, /* v2 I */
+	},
+	{
+		{0},
+		{0, 16, 20, 24, 28, 32, 40,  48,  56,  64,  80,  96, 112, 128, 160, 0}, /* v1 III */
+		{0, 16, 24, 28, 32, 40, 48,  56,  64,  80,  96, 112, 128, 160, 192, 0}, /* v1 II */
+		{0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 0}, /* v1 I */
+	}
+};
+
+static const uint samplerates[4][4] = {
+	{11025, 12000,  8000, 0},
+	{    0,     0,     0, 0},
+	{22050, 24000, 16000, 0},
+	{44100, 48000, 32000, 0},
+};
+
+static const int chans[] = {2, 2, 2, 1};
+
+static const int samplesframe[4][4] = {
+	{0,    0,    0,   0},
+	{0,  576, 1152, 384},
+	{0,  576, 1152, 384},
+	{0, 1152, 1152, 384},
+};
+
+static void
+getduration(Tagctx *ctx, int offset)
+{
+	uvlong n, framelen, samplespf;
+	uchar *b;
+	uint x;
+	int xversion, xlayer, xbitrate;
+
+	if(ctx->read(ctx, ctx->buf, 64) != 64)
+		return;
+
+	x = beuint((uchar*)ctx->buf);
+	xversion = x >> 19 & 3;
+	xlayer = x >> 17 & 3;
+	xbitrate = x >> 12 & 0xf;
+	ctx->bitrate = 2000*(int)bitrates[xversion][xlayer][xbitrate];
+	samplespf = samplesframe[xversion][xlayer];
+
+	ctx->samplerate = samplerates[xversion][x >> 10 & 3];
+	ctx->channels = chans[x >> 6 & 3];
+
+	if(ctx->samplerate > 0){
+		framelen = (uvlong)144*ctx->bitrate / ctx->samplerate;
+		if((x & (1<<9)) != 0) /* padding */
+			framelen += xlayer == 3 ? 4 : 1; /* for I it's 4 bytes */
+
+		if(memcmp(&ctx->buf[0x24], "Info", 4) == 0 || memcmp(&ctx->buf[0x24], "Xing", 4) == 0){
+			b = (uchar*)ctx->buf + 0x28;
+			x = beuint(b); b += 4;
+			if((x & 1) != 0){ /* number of frames is set */
+				n = beuint(b); b += 4;
+				ctx->duration = n * samplespf * 1000 / ctx->samplerate;
+			}
+
+			if(ctx->duration == 0 && (x & 2) != 0 && framelen > 0){ /* file size is set */
+				n = beuint(b);
+				ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate;
+			}
+		}else if(memcmp(&ctx->buf[0x24], "VBRI", 4) == 0){
+			n = beuint((uchar*)&ctx->buf[0x32]);
+			ctx->duration = n * samplespf * 1000 / ctx->samplerate;
+
+			if(ctx->duration == 0 && framelen > 0){
+				n = beuint((uchar*)&ctx->buf[0x28]); /* file size */
+				ctx->duration = n * samplespf * 1000 / framelen / ctx->samplerate;
+			}
+		}
+	}
+
+	if(ctx->bitrate > 0 && ctx->duration == 0) /* worst case -- use real file size instead */
+		ctx->duration = (ctx->seek(ctx, 0, 2) - offset)/(ctx->bitrate / 1000) * 8;
+}
+
+int
+tagid3v2(Tagctx *ctx)
+{
+	int sz, exsz, framesz;
+	int ver, unsync, offset;
+	uchar d[10], *b;
+
+	if(ctx->read(ctx, d, sizeof(d)) != sizeof(d))
+		return -1;
+	if(!isid3(d)){ /* no tags, but the stream information is there */
+		if(d[0] != 0xff || (d[1] & 0xe0) != 0xe0)
+			return -1;
+		ctx->seek(ctx, -(int)sizeof(d), 1);
+		getduration(ctx, 0);
+		return 0;
+	}
+
+header:
+	ver = d[3];
+	unsync = d[5] & (1<<7);
+	sz = synchsafe(&d[6]);
+
+	if(ver == 2 && (d[5] & (1<<6)) != 0) /* compression */
+		return -1;
+
+	if(ver > 2){
+		if((d[5] & (1<<4)) != 0) /* footer */
+			sz -= 10;
+		if((d[5] & (1<<6)) != 0){ /* skip extended header */
+			if(ctx->read(ctx, d, 4) != 4)
+				return -1;
+			exsz = (ver >= 3) ? beuint(d) : synchsafe(d);
+			if(ctx->seek(ctx, exsz, 1) < 0)
+				return -1;
+			sz -= exsz;
+		}
+	}
+
+	framesz = (ver >= 3) ? 10 : 6;
+	for(; sz > framesz;){
+		int tsz, frameunsync;
+
+		if(ctx->read(ctx, d, framesz) != framesz)
+			return -1;
+		sz -= framesz;
+
+		/* return on padding */
+		if(memcmp(d, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", framesz) == 0)
+			break;
+		if(ver >= 3){
+			tsz = (ver == 3) ? beuint(&d[4]) : synchsafe(&d[4]);
+			if(tsz < 0 || tsz > sz)
+				break;
+			frameunsync = d[9] & (1<<1);
+			d[4] = 0;
+
+			if((d[9] & 0x0c) != 0){ /* compression & encryption */
+				ctx->seek(ctx, tsz, 1);
+				sz -= tsz;
+				continue;
+			}
+			if(ver == 4 && (d[9] & 1<<0) != 0){ /* skip data length indicator */
+				ctx->seek(ctx, 4, 1);
+				sz -= 4;
+				tsz -= 4;
+			}
+		}else{
+			tsz = beuint(&d[3]) >> 8;
+			if(tsz > sz)
+				return -1;
+			frameunsync = 0;
+			d[3] = 0;
+		}
+		sz -= tsz;
+
+		if(d[0] == 'T' && text(ctx, d, tsz, unsync || frameunsync) != 0)
+			return -1;
+		else if(d[0] != 'T' && nontext(ctx, d, tsz, unsync || frameunsync) != 0)
+			return -1;
+	}
+
+	offset = ctx->seek(ctx, sz, 1);
+	sz = ctx->bufsz <= 2048 ? ctx->bufsz : 2048;
+	b = nil;
+	for(exsz = 0; exsz < 2048; exsz += sz){
+		if(ctx->read(ctx, ctx->buf, sz) != sz)
+			break;
+		for(b = (uchar*)ctx->buf; (b = memchr(b, 'I', sz - 1 - ((char*)b - ctx->buf))) != nil; b++){
+			ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0);
+			if(ctx->read(ctx, d, sizeof(d)) != sizeof(d))
+				return 0;
+			if(isid3(d))
+				goto header;
+		}
+		if((b = memchr(ctx->buf, 0xff, sz-1)) != nil && (b[1] & 0xe0) == 0xe0){
+			offset = ctx->seek(ctx, (char*)b - ctx->buf + offset + exsz, 0);
+			break;
+		}
+	}
+
+	if(b != nil)
+		getduration(ctx, offset);
+
+	return 0;
+}
--- /dev/null
+++ b/src/m4a.c
@@ -1,0 +1,153 @@
+/* http://wiki.multimedia.cx/?title=QuickTime_container */
+/* https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html */
+#include "tagspriv.h"
+
+#define beuint(d) (uint)((d)[0]<<24 | (d)[1]<<16 | (d)[2]<<8 | (d)[3]<<0)
+#define beuint16(d) (ushort)((d)[0]<<8 | (d)[1]<<0)
+
+int
+tagm4a(Tagctx *ctx)
+{
+	uvlong duration;
+	uchar *d;
+	int sz, type, dtype, i, skip, n;
+
+	d = (uchar*)ctx->buf;
+	/* 4 bytes for atom size, 4 for type, 4 for data - exect "ftyp" to come first */
+	if(ctx->read(ctx, d, 4+4+4) != 4+4+4 || memcmp(d+4, "ftypM4A ", 8) != 0)
+		return -1;
+	sz = beuint(d) - 4; /* already have 8 bytes */
+
+	for(;;){
+		if(ctx->seek(ctx, sz, 1) < 0)
+			return -1;
+		if(ctx->read(ctx, d, 4) != 4) /* size */
+			break;
+		sz = beuint(d);
+		if(sz == 0)
+			continue;
+		if(ctx->read(ctx, d, 4) != 4) /* type */
+			return -1;
+		if(sz < 8)
+			continue;
+
+		d[4] = 0;
+
+		if(memcmp(d, "meta", 4) == 0){
+			sz = 4;
+			continue;
+		}else if(
+			memcmp(d, "udta", 4) == 0 ||
+			memcmp(d, "ilst", 4) == 0 ||
+			memcmp(d, "trak", 4) == 0 ||
+			memcmp(d, "mdia", 4) == 0 ||
+			memcmp(d, "minf", 4) == 0 ||
+			memcmp(d, "stbl", 4) == 0){
+			sz = 0;
+			continue;
+		}else if(memcmp(d, "stsd", 4) == 0){
+			sz -= 8;
+			if(ctx->read(ctx, d, 8) != 8)
+				return -1;
+			sz -= 8;
+
+			for(i = beuint(&d[4]); i > 0 && sz > 0; i--){
+				if(ctx->read(ctx, d, 8) != 8) /* size + format */
+					return -1;
+				sz -= 8;
+				skip = beuint(d) - 8;
+
+				if(memcmp(&d[4], "mp4a", 4) == 0){ /* audio */
+					n = 6+2 + 2+4+2 + 2+2 + 2+2 + 4; /* read a bunch at once */
+					/* reserved+id, ver+rev+vendor, channels+bps, ?+?, sample rate */
+					if(ctx->read(ctx, d, n) != n)
+						return -1;
+					skip -= n;
+					sz -= n;
+					ctx->channels = beuint16(&d[16]);
+					ctx->samplerate = beuint(&d[24])>>16;
+				}
+
+				if(ctx->seek(ctx, skip, 1) < 0)
+					return -1;
+				sz -= skip;
+			}
+			continue;
+		}
+
+		sz -= 8;
+		type = -1;
+		if(memcmp(d, "\251nam", 4) == 0)
+			type = Ttitle;
+		else if(memcmp(d, "\251alb", 4) == 0)
+			type = Talbum;
+		else if(memcmp(d, "\251ART", 4) == 0)
+			type = Tartist;
+		else if(memcmp(d, "\251gen", 4) == 0 || memcmp(d, "gnre", 4) == 0)
+			type = Tgenre;
+		else if(memcmp(d, "\251day", 4) == 0)
+			type = Tdate;
+		else if(memcmp(d, "covr", 4) == 0)
+			type = Timage;
+		else if(memcmp(d, "trkn", 4) == 0)
+			type = Ttrack;
+		else if(memcmp(d, "mdhd", 4) == 0){
+			if(ctx->read(ctx, d, 4) != 4)
+				return -1;
+			sz -= 4;
+			duration = 0;
+			if(d[0] == 0){ /* version 0 */
+				if(ctx->read(ctx, d, 16) != 16)
+					return -1;
+				sz -= 16;
+				duration = beuint(&d[12]) / beuint(&d[8]);
+			}else if(d[1] == 1){ /* version 1 */
+				if(ctx->read(ctx, d, 28) != 28)
+					return -1;
+				sz -= 28;
+				duration = ((uvlong)beuint(&d[20])<<32 | beuint(&d[24])) / (uvlong)beuint(&d[16]);
+			}
+			ctx->duration = duration * 1000;
+			continue;
+		}
+
+		if(type < 0)
+			continue;
+
+		if(ctx->seek(ctx, 8, 1) < 0) /* skip size and "data" */
+			return -1;
+		sz -= 8;
+		if(ctx->read(ctx, d, 8) != 8) /* read data type and 4 bytes of whatever else */
+			return -1;
+		sz -= 8;
+		d[0] = 0;
+		dtype = beuint(d);
+
+		if(type == Ttrack){
+			if(ctx->read(ctx, d, 4) != 4)
+				return -1;
+			sz -= 4;
+			snprint((char*)d, ctx->bufsz, "%d", beuint(d));
+			txtcb(ctx, type, d);
+		}else if(type == Tgenre){
+			if(ctx->read(ctx, d, 2) != 2)
+				return -1;
+			sz -= 2;
+			if((i = d[1]-1) >= 0 && i < Numgenre)
+				txtcb(ctx, type, id3genres[i]);
+		}else if(dtype == 1){ /* text */
+			if(sz >= ctx->bufsz) /* skip tags that can't fit into memory. ">=" because of '\0' */
+				continue;
+			if(ctx->read(ctx, d, sz) != sz)
+				return -1;
+			d[sz] = 0;
+			txtcb(ctx, type, d);
+			sz = 0;
+		}else if(type == Timage && dtype == 13) /* jpeg cover image */
+			tagscallcb(ctx, Timage, "image/jpeg", ctx->seek(ctx, 0, 1), sz, nil);
+		else if(type == Timage && dtype == 14) /* png cover image */
+			tagscallcb(ctx, Timage, "image/png", ctx->seek(ctx, 0, 1), sz, nil);
+	}
+
+	return 0;
+}
--- /dev/null
+++ b/src/mkfile
@@ -1,0 +1,22 @@
+</$objtype/mkfile
+LIB=/$objtype/lib/libtags.a
+
+OFILES=\
+	8859.$O\
+	flac.$O\
+	id3genres.$O\
+	id3v1.$O\
+	id3v2.$O\
+	m4a.$O\
+	tags.$O\
+	utf16.$O\
+	vorbis.$O\
+
+HFILES=\
+	/sys/include/tags.h\
+	tagspriv.h\
+
+/sys/include/%.h: %.h
+	cp $stem.h /sys/include/$stem.h
+
+</sys/src/cmd/mksyslib
--- /dev/null
+++ b/src/mkfile.nix
@@ -1,0 +1,21 @@
+LIB=libtags.a
+
+<$MKRULES/defaults
+
+OFILES=\
+	8859.$O\
+	flac.$O\
+	id3genres.$O\
+	id3v1.$O\
+	id3v2.$O\
+	m4a.$O\
+	tags.$O\
+	utf16.$O\
+	vorbis.$O\
+
+%.$O: tagspriv.h
+
+HFILES=\
+	tags.h\
+
+<$MKRULES/mklib
--- /dev/null
+++ b/src/tags.c
@@ -1,0 +1,61 @@
+#include "tagspriv.h"
+
+typedef struct Getter Getter;
+
+struct Getter
+{
+	int (*f)(Tagctx *ctx);
+	const char *ext;
+	int extlen;
+	int format;
+};
+
+extern int tagvorbis(Tagctx *ctx);
+extern int tagflac(Tagctx *ctx);
+extern int tagid3v2(Tagctx *ctx);
+extern int tagid3v1(Tagctx *ctx);
+extern int tagm4a(Tagctx *ctx);
+
+static const Getter g[] =
+{
+	{tagid3v2, ".mp3", 4, Fmp3},
+	{tagid3v1, ".mp3", 4, Fmp3},
+	{tagvorbis, ".ogg", 4, Fogg},
+	{tagflac, ".flac", 5, Fflac},
+	{tagm4a, ".m4a", 4, Fm4a},
+};
+
+void
+tagscallcb(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f)
+{
+	ctx->found |= 1<<type;
+	ctx->tag(ctx, type, s, offset, size, f);
+	ctx->num++;
+}
+
+int
+tagsget(Tagctx *ctx)
+{
+	int i, len, res;
+
+	/* enough for having an extension */
+	len = 0;
+	if(ctx->filename != nil && (len = strlen(ctx->filename)) < 5)
+		return -1;
+	ctx->channels = ctx->samplerate = ctx->bitrate = ctx->duration = 0;
+	ctx->found = 0;
+	ctx->format = -1;
+	res = -1;
+	for(i = 0; i < (int)(sizeof(g)/sizeof(g[0])); i++){
+		if(ctx->filename == nil || memcmp(&ctx->filename[len-g[i].extlen], g[i].ext, g[i].extlen) == 0){
+			ctx->num = 0;
+			if(g[i].f(ctx) == 0 && ctx->num > 0){
+				res = 0;
+				ctx->format = g[i].format;
+			}
+			ctx->seek(ctx, 0, 0);
+		}
+	}
+
+	return res;
+}
--- /dev/null
+++ b/src/tags.h
@@ -1,0 +1,78 @@
+#ifndef __unix__
+#pragma lib "libtags.a"
+#endif
+
+typedef struct Tagctx Tagctx;
+typedef int (*Tagread)(void *buf, int *cnt);
+
+/* Tag type. */
+enum
+{
+	Tartist,
+	Talbum,
+	Ttitle,
+	Tdate, /* "2014", "2015/02/01", but the year goes first */
+	Ttrack, /* "1", "01", "1/4", but the track number goes first */
+	Talbumgain,
+	Talbumpeak,
+	Ttrackgain,
+	Ttrackpeak,
+	Tgenre,
+	Timage,
+};
+
+/* Format of the audio file. */
+enum
+{
+	Fmp3,
+	Fogg,
+	Fflac,
+	Fm4a,
+};
+
+/* Tag parser context. You need to set it properly before parsing an audio file using libtags. */
+struct Tagctx
+{
+	/* Set it to the filename. Doesn't have to be a full path, but extension must be there. */
+	const char *filename;
+
+	/* Read function. This is what libtags uses to read the file. */
+	int (*read)(Tagctx *ctx, void *buf, int cnt);
+
+	/* Seek function. This is what libtags uses to seek through the file. */
+	int (*seek)(Tagctx *ctx, int offset, int whence);
+
+	/* Callback that is used by libtags to inform about the tags of a file.
+	 * "type" is the tag's type (Tartist, ...). "s" is the null-terminated string unless "type" is
+	 * Timage. "offset" and "size" define the placement and size of the image cover ("type" = Timage)
+	 * inside the file, and "f" is not NULL in case reading the image cover requires additional
+	 * operations on the data, in which case you need to read the image cover as a stream and call this
+	 * function to apply these operations on the contents read.
+	 */
+	void (*tag)(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f);
+
+	/* Auxiliary data. Not used by libtags. */
+	void *aux;
+
+	/* Memory buffer to work in. */
+	char *buf;
+
+	/* Size of the buffer. Must be at least 256 bytes. */
+	int bufsz;
+
+	/* Here goes the stuff libtags sets. It should be accessed after tagsget() returns.
+	 * A value of 0 means it's undefined.
+	 */
+	int channels; /* Number of channels. */
+	int samplerate; /* Hz */
+	int bitrate; /* Bitrate, bits/s. */
+	int duration; /* ms */
+	int format; /* Fmp3, Fogg, Fflac, Fm4a */
+
+	/* Private, don't touch. */
+	int found;
+	int num;
+};
+
+/* Parse the file using this function. Returns 0 on success. */
+extern int tagsget(Tagctx *ctx);
--- /dev/null
+++ b/src/tagspriv.h
@@ -1,0 +1,49 @@
+#ifdef __unix__
+#define _DEFAULT_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <strings.h>
+#define snprint snprintf
+#define cistrcmp strcasecmp
+#define nil NULL
+typedef unsigned char uchar;
+typedef unsigned long long uvlong;
+#else
+#include <u.h>
+#include <libc.h>
+#endif
+#include "tags.h"
+
+enum
+{
+	Numgenre = 192,
+};
+
+extern const char *id3genres[Numgenre];
+
+/*
+ * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer.
+ * Returns the number of bytes converted.
+ * You need sz*2+1 bytes for out buffer to be completely safe.
+ */
+int iso88591toutf8(uchar *out, int osz, const uchar *src, int sz);
+
+/*
+ * Converts (to UTF-8) at most sz bytes of src and writes it to out buffer.
+ * Returns the number of bytes converted or < 0 in case of error.
+ * You need sz*4+1 bytes for out buffer to be completely safe.
+ * UTF-16 defaults to big endian if there is no BOM.
+ */
+int utf16to8(uchar *out, int osz, const uchar *src, int sz);
+
+/*
+ * This one is common for both vorbis.c and flac.c
+ * It maps a string k to tag type and executes the callback from ctx.
+ * Returns 1 if callback was called, 0 otherwise.
+ */
+void cbvorbiscomment(Tagctx *ctx, char *k, char *v);
+
+void tagscallcb(Tagctx *ctx, int type, const char *s, int offset, int size, Tagread f);
+
+#define txtcb(ctx, type, s) tagscallcb(ctx, type, (const char*)s, 0, 0, nil)
--- /dev/null
+++ b/src/utf16.c
@@ -1,0 +1,59 @@
+/* Horror stories: http://en.wikipedia.org/wiki/UTF-16 */
+#include "tagspriv.h"
+
+#define rchr(s) (be ? ((s)[0]<<8 | (s)[1]) : ((s)[1]<<8 | (s)[0]))
+
+static const uchar mark[] = {0x00, 0x00, 0xc0, 0xe0, 0xf0};
+
+int
+utf16to8(uchar *o, int osz, const uchar *s, int sz)
+{
+	int i, be, c, c2, wr, j;
+
+	i = 0;
+	be = 1;
+	if(s[0] == 0xfe && s[1] == 0xff)
+		i += 2;
+	else if(s[0] == 0xff && s[1] == 0xfe){
+		be = 0;
+		i += 2;
+	}
+
+	for(; i < sz-1 && osz > 1;){
+		c = rchr(&s[i]);
+		i += 2;
+		if(c >= 0xd800 && c <= 0xdbff && i < sz-1){
+			c2 = rchr(&s[i]);
+			if(c2 >= 0xdc00 && c2 <= 0xdfff){
+				c = 0x10000 | (c - 0xd800)<<10 | (c2 - 0xdc00);
+				i += 2;
+			}else
+				return -1;
+		}else if(c >= 0xdc00 && c <= 0xdfff)
+			return -1;
+
+		if(c < 0x80)
+			wr = 1;
+		else if(c < 0x800)
+			wr = 2;
+		else if(c < 0x10000)
+			wr = 3;
+		else
+			wr = 4;
+
+		osz -= wr;
+		if(osz < 1)
+			break;
+
+		o += wr;
+		for(j = wr; j > 1; j--){
+			*(--o) = (c & 0xbf) | 0x80;
+			c >>= 6;
+		}
+		*(--o) = c | mark[wr];
+		o += wr;
+	}
+
+	*o = 0;
+	return i;
+}
--- /dev/null
+++ b/src/vorbis.c
@@ -1,0 +1,123 @@
+/*
+ * https://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810005
+ * https://wiki.xiph.org/VorbisComment
+ */
+#include "tagspriv.h"
+
+#define leuint(d) (uint)(((uchar*)(d))[3]<<24 | ((uchar*)(d))[2]<<16 | ((uchar*)(d))[1]<<8 | ((uchar*)(d))[0]<<0)
+
+void
+cbvorbiscomment(Tagctx *ctx, char *k, char *v){
+	if(*v == 0)
+		return;
+	if(cistrcmp(k, "album") == 0)
+		txtcb(ctx, Talbum, v);
+	else if(cistrcmp(k, "title") == 0)
+		txtcb(ctx, Ttitle, v);
+	else if(cistrcmp(k, "artist") == 0 || cistrcmp(k, "performer") == 0)
+		txtcb(ctx, Tartist, v);
+	else if(cistrcmp(k, "tracknumber") == 0)
+		txtcb(ctx, Ttrack, v);
+	else if(cistrcmp(k, "date") == 0)
+		txtcb(ctx, Tdate, v);
+	else if(cistrcmp(k, "replaygain_track_peak") == 0)
+		txtcb(ctx, Ttrackpeak, v);
+	else if(cistrcmp(k, "replaygain_track_gain") == 0)
+		txtcb(ctx, Ttrackgain, v);
+	else if(cistrcmp(k, "replaygain_album_peak") == 0)
+		txtcb(ctx, Talbumpeak, v);
+	else if(cistrcmp(k, "replaygain_album_gain") == 0)
+		txtcb(ctx, Talbumgain, v);
+	else if(cistrcmp(k, "genre") == 0)
+		txtcb(ctx, Tgenre, v);
+}
+
+int
+tagvorbis(Tagctx *ctx)
+{
+	char *v;
+	uchar *d, h[4];
+	int sz, numtags, i, npages;
+
+	d = (uchar*)ctx->buf;
+	/* need to find vorbis frame with type=3 */
+	for(npages = 0; npages < 2; npages++){ /* vorbis comment is the second header */
+		int nsegs;
+		if(ctx->read(ctx, d, 27) != 27)
+			return -1;
+		if(memcmp(d, "OggS", 4) != 0)
+			return -1;
+
+		/* calculate the size of the packet */
+		nsegs = d[26];
+		if(ctx->read(ctx, d, nsegs+1) != nsegs+1)
+			return -1;
+		for(sz = i = 0; i < nsegs; sz += d[i++]);
+
+		if(d[nsegs] == 3) /* comment */
+			break;
+		if(d[nsegs] == 1 && sz >= 28){ /* identification */
+			if(ctx->read(ctx, d, 28) != 28)
+				return -1;
+			sz -= 28;
+			ctx->channels = d[10];
+			ctx->samplerate = leuint(&d[11]);
+			if((ctx->bitrate = leuint(&d[15])) == 0) /* maximum */
+				ctx->bitrate = leuint(&d[19]); /* nominal */
+		}
+
+		ctx->seek(ctx, sz-1, 1);
+	}
+
+	if(ctx->read(ctx, &d[1], 10) != 10 || memcmp(&d[1], "vorbis", 6) != 0)
+		return -1;
+	sz = leuint(&d[7]);
+	if(ctx->seek(ctx, sz, 1) < 0 || ctx->read(ctx, h, 4) != 4)
+		return -1;
+	numtags = leuint(h);
+
+	for(i = 0; i < numtags; i++){
+		if(ctx->read(ctx, h, 4) != 4)
+			return -1;
+		if((sz = leuint(h)) < 0)
+			return -1;
+
+		if(ctx->bufsz < sz+1){
+			if(ctx->seek(ctx, sz, 1) < 0)
+				return -1;
+			continue;
+		}
+		if(ctx->read(ctx, ctx->buf, sz) != sz)
+			return -1;
+		ctx->buf[sz] = 0;
+
+		if((v = strchr(ctx->buf, '=')) == nil)
+			return -1;
+		*v++ = 0;
+		cbvorbiscomment(ctx, ctx->buf, v);
+	}
+
+	/* calculate the duration */
+	if(ctx->samplerate > 0){
+		sz = ctx->bufsz <= 4096 ? ctx->bufsz : 4096;
+		for(i = sz; i < 65536+16; i += sz - 16){
+			if(ctx->seek(ctx, -i, 2) <= 0)
+				break;
+			v = ctx->buf;
+			if(ctx->read(ctx, v, sz) != sz)
+				break;
+			for(; v != nil && v < ctx->buf+sz;){
+				v = memchr(v, 'O', ctx->buf+sz - v - 14);
+				if(v != nil && v[1] == 'g' && v[2] == 'g' && v[3] == 'S' && (v[5] & 4) == 4){ /* last page */
+					uvlong g = leuint(v+6) | (uvlong)leuint(v+10)<<32;
+					ctx->duration = g * 1000 / ctx->samplerate;
+					return 0;
+				}
+				if(v != nil)
+					v++;
+			}
+		}
+	}
+
+	return 0;
+}