shithub: nime

ref: 74df3901c04f70c1ba89e71b78dee9dc2882e116
dir: /src/to-kana.c/

View raw version
/*
 *******************************************************************************
 * Author: Ethan Long
 * Licence: Public Domain
 * Email: ethandavidlong@gmail.com, u7281759@anu.edu.au
 * Description: to-kana is a program for converting streams of romaji to
 *              hiragana or katakana.
 */
#include <u.h>
#include <libc.h>
#include <stdio.h>

void eval(int, int);
void kanafill(Rune, char*, int, Rune*);
void kanalook(char*, Rune*, int);
void printhelp(void);
int strappend(char*, char);


void
main(int argc, char *argv[])
{
	int fpin, fpout;
	ARGBEGIN{
	case 's':
		fpin = fileno(stdin);
		fpout = fileno(stdout);
		eval(fpin, fpout);
		break;
	case 'h':
		printhelp();
		exits(0);
	default:
		fprint(2, "usage: %s [-s][-h]\n", argv0);
		printhelp();
		exits("usage");
	}ARGEND;
	exits(nil);
}

void
printhelp(void)
{
	print("options:\n");
	print(" [-s] - use stdin/out streams\n");
	print(" [-h] - show this help\n");
	return;
}

/*
 * Runs through the input at the file pointer fpin until it reaches EOF
 * outputs kana to file pointer fpout.
 */
void
eval(int fpin, int fpout)
{
	int reading = 1;
	int katakana = 0;
	char charin;
	char buf[10];
	Rune kana[10];
	while(reading){
		if(read(fpin, &charin, 1)){
			if(charin == '!')
				katakana = (0 == katakana);
			else if(strappend(buf, charin)){
				kanalook(buf, kana, katakana);
				fprint(fpout, "%S", kana);
				for(int i=0; i<10; i++){
					kana[i] = 0;
					buf[i] = 0;
				}
			}
		}
		else{
			reading = 0;
		}
	}
	fprint(fpout, "%s", buf);
	return;
}

/*
 * Appends the provided character to the end of the provided string.
 * If this results in a complete kana being formable, return 1.
 */
int
strappend(char* string, char in)
{
	int end;
	int i;
	
	for(i=0; string[i] != 0; i++);
	end = (in == 'a' || in == 'i' || in == 'u' || in == 'e' || in == 'o');
	if (in == 'n'){
		if (string[i-1] == 'n')
			end = 1;
	}
	string[i] = in;
	return end;
}

/*
 * kanalook takes in a pointer to a string and a pointer to a string of Rune
 * and converts the string in romaji to a string of runes of kana.
 */
void
kanalook(char* buf, Rune* str, int katakana)
{
	int index;
	char* end;
	
	katakana = katakana * 96;
	
	index = 0;
	if(buf[0] == buf[1]){
		str[index] = L'っ';
		index ++;
	}
	end = buf + 1 + index;
	
	switch(buf[0]){
	/* あ family */
	case 'a':
		str[0] = (Rune)((int)(L'あ') + katakana);
		break;
	case 'i':
		str[0] = (Rune)((int)(L'い') + katakana);
		break;
	case 'u':
		str[0] = (Rune)((int)(L'う') + katakana);
		break;
	case 'e':
		str[0] = (Rune)((int)(L'え') + katakana);
		break;
	case 'o':
		str[0] = (Rune)((int)(L'お') + katakana);
		break;
	
	/* か family */
	case 'k':
		kanafill((Rune)((int)(L'か') + katakana), end, 0, str + index);
		break;
	case 'g':
		kanafill((Rune)((int)(L'か') + katakana), end, 1, str + index);
		break;
	
	/* さ family */
	case 's':
		if(buf[index + 1] == 'h'){
			end[0] = buf[index + 2];
			end[1] = 0;
		}
		kanafill((Rune)((int)(L'さ') + katakana), end, 0, str + index);
		break;
	case 'z':
		kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index);
		break;
	case 'j':
		if(buf[index + 1] != 'y' && buf[index + 1] != 'i'){
			end[0] = 'y';
			end[1] = buf[index + 1];
			kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index);
		} else
			kanafill((Rune)((int)(L'さ') + katakana), end, 1, str + index);
		break;
	
	/* た family */
	case 't':
		kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index);
		break;
	case 'c':
		if(buf[index + 1] != 'y' && buf[index + 1] != 'i'){
			end[0] = 'y';
			end[1] = buf[index + 1];
			kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index);
		} else
			kanafill((Rune)((int)(L'た') + katakana), end, 0, str + index);
		break;
	case 'd':
		kanafill((Rune)((int)(L'た') + katakana), end, 1, str + index);
		break;
	
	/* な family (and ん) */
	case 'n':
		if(buf[1] == 'n')
			str[0] = (Rune)((int)(L'ん') + katakana);
		else
			kanafill((Rune)((int)(L'な') + katakana), end, 0, str + index);
		break;
	
	/* は family */
	case 'h':
		kanafill((Rune)((int)(L'は') + katakana), end, 0, str + index);
		break;
	case 'f':
		end[0] = 'f';
		end[1] = buf[index + 1];
		kanafill((Rune)((int)(L'は') + katakana), end, 0, str + index);
		break;
	case 'b':
		kanafill((Rune)((int)(L'は') + katakana), end, 1, str + index);
		break;
	case 'p':
		kanafill((Rune)((int)(L'は') + katakana), end, 2, str + index);
		break;
	
	/* ま family */
	case 'm':
		kanafill((Rune)((int)(L'ま') + katakana), end, 0, str + index);
		break;
	
	/* や family */
	case 'y':
		kanafill((Rune)((int)(L'や') + katakana), end, 0, str + index);
		break;
	
	/* ら family */
	case 'r':
		kanafill((Rune)((int)(L'ら') + katakana), end, 0, str + index);
		break;
	
	/* わ family */
	case 'w':
		kanafill((Rune)((int)(L'わ') + katakana), end, 0, str + index);
		break;
	
	default:
		break;
	}
	return;
}

/*
 * The kanafill function takes in the base kana of the family to be output,
 * the character representation of the ending syllabaries, an integer
 * representing the type of dakuten (1 for regular, 2 for maru), and a pointer
 * to a rune array that is being edited.
 */
void
kanafill(Rune base, char* in, int dakuten, Rune* out)
{
	switch(base){
	/* Special Boys */
	case L'や':
	case L'ヤ':
		switch(in[0]){
		case 'a':
			out[0] = base;
			break;
		case 'u':
			out[0] = (Rune)((int)base + 2);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 4);
			break;
		default:
			break;
		}
		break;

	case L'わ':
	case L'ワ':
		switch(in[0]){
		case 'a':
			out[0] = base;
			break;
		case 'i':
			out[0] = (Rune)((int)base + 1);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 2);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 3);
			break;
		default:
			break;
		}
		break;

	case L'ん':
	case L'ン':
		out[0] = L'ん';
		break;
	
	/* は family has marudakuten */
	case L'は':
	case L'ハ':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 3 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 6 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 9 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 12 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 3 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* families with no dakuten or small forms */
	case L'ら':
	case L'ラ':
	case L'ま':
	case L'マ':
	case L'な':
	case L'ナ':
		switch(in[0]){
		case 'a':
			out[0] = base;
			break;
		case 'i':
			out[0] = (Rune)((int)base + 1);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 2);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 3);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 4);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 1);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* families with just dakuten */
	case L'か':
	case L'カ':
	case L'さ':
	case L'サ':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 2 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 4 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 6 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 8 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 2 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	/* た family has small つ and dakuten */
	case L'た':
	case L'タ':
		switch(in[0]){
		case 'a':
			out[0] = (Rune)((int)base + dakuten);
			break;
		case 'i':
			out[0] = (Rune)((int)base + 2 + dakuten);
			break;
		case 'u':
			out[0] = (Rune)((int)base + 5 + dakuten);
			break;
		case 'e':
			out[0] = (Rune)((int)base + 7 + dakuten);
			break;
		case 'o':
			out[0] = (Rune)((int)base + 9 + dakuten);
			break;
		case 'y':
			out[0] = (Rune)((int)base + 2 + dakuten);
			switch(in[1]){
			case 'a':
				out[1] = L'ゃ';
				break;
			case 'u':
				out[1] = L'ゅ';
				break;
			case 'o':
				out[1] = L'ょ';
				break;
			default:
				break;
			}
		default:
			break;
		}
		break;
	
	default:
		break;
	}
	return;
}