Sophie

Sophie

distrib > Mageia > 5 > i586 > media > core-release > by-pkgid > fff2921ff40d1f832561027dc17323f5 > files > 29

epic5-1.1.9-3.mga5.i586.rpm

/*
 * Here's the plan
 * You type a UTF8 string, and it gives you all of the Unicode code points
 * (in hex) on output.
 */
#include <stdio.h>

int main (void) 
{
	unsigned char 	string[256];
	unsigned char *  next_point;
	unsigned char *	this_point;
	unsigned long	code_point;
	unsigned int	numbytes;

    for (;;)
    {
	fgets(string, 256, stdin);
	if (feof(stdin)) {
		exit(0);
	}
	next_point = this_point = string;

    while (*this_point != '\n')
    {
	code_point = 0;

	if (((unsigned long)(*this_point) & 0xFE) == 0xFC) {
		numbytes = 6;
		code_point = ((unsigned long)this_point[0] & 0x01) << 30;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 24;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 18;
		code_point += ((unsigned long)this_point[3] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[4] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[5] & 0x3F);
		this_point += 6;
	} else if (((unsigned long)(*this_point) & 0xFC) == 0xF8) {
		numbytes = 5;
		code_point = ((unsigned long)this_point[0] & 0x03) << 24;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 18;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[3] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[4] & 0x3F);
		this_point += 5;
	} else if (((unsigned long)(*this_point) & 0xF8) == 0xF0) {
		numbytes = 4;
		code_point = ((unsigned long)this_point[0] & 0x07) << 18;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 12;
		code_point += ((unsigned long)this_point[2] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[3] & 0x3F);
		this_point += 4;
	} else if (((unsigned long)(*this_point) & 0xF0) == 0xE0) {
		numbytes = 3;
		code_point = ((unsigned long)this_point[0] & 0x0F) << 12;
		code_point += ((unsigned long)this_point[1] & 0x3F) << 6;
		code_point += ((unsigned long)this_point[2] & 0x3F);
		this_point += 3;
	} else if (((unsigned long)(*this_point) & 0xE0) == 0xC0) {
		numbytes = 2;
		code_point = (this_point[0] & 0x1F) << 6;
		code_point += (this_point[1] & 0x3F);
		this_point += 2;
	} else if (((unsigned long)(*this_point) & 0x80) == 0x00) {
		numbytes = 1;
		code_point = (this_point[0] & 0x7F);
		this_point++;
	} else {
		printf("Huh? %#x\n", (int)*this_point);
		printf("%d\n", *this_point & 0x80);
		exit(0);
	}

	printf("%#x\n", code_point);
    }
    }
}