Jo_MPEG converted to C

jo_mpeg is a C++ single header library written by Jon Olick, which creates MPEG-1 videos (without audio). It is listed as a C++ only library in stb’s single header library collection. However, only the & reference format is what makes this library C++ only. Replacing those with simple pointers makes this compile with both C and C++. I’m also a big fan of stb-type of library code, so I converted the library to C.

Full source of the C compatible jo_mpeg.h

/* public domain Simple, Minimalistic, No Allocations MPEG writer - http://jonolick.com
 *
 * Converted to C by Wladislav Artsimovich https://blog.frost.kiwi/jo-mpeg-in-c
 *
 * Latest revisions:
 * 	1.04 (21-01-2025) YUV math fix, this time for real ( thx @r-lyeh, again )
 *                    as per https://github.com/FrostKiwi/treasurechest/issues/5
 * 	1.03 (15-08-2024) Reverted color space change from 1.02, as it resulted in
 *                    overscaled color vectors and thus oversaturated colors
 * 	1.02 (22-03-2017) Fixed AC encoding bug. 
 *                    Fixed color space bug (thx r- lyeh!)
 * 	1.01 (18-10-2016) warning fixes
 * 	1.00 (25-09-2016) initial release
 *
 * Basic usage:
 *	char *frame = new char[width*height*4]; // 4 component. RGBX format, where X is unused 
 *	FILE *fp = fopen("foo.mpg", "wb");
 *	jo_write_mpeg(fp, frame, width, height, 60);  // frame 0
 *	jo_write_mpeg(fp, frame, width, height, 60);  // frame 1
 *	jo_write_mpeg(fp, frame, width, height, 60);  // frame 2
 *	...
 *	fclose(fp);
 *
 * Notes:
 * 	Only supports 24, 25, 30, 50, or 60 fps
 *
 * 	I don't know if decoders support changing of fps, or dimensions for each frame. 
 * 	Movie players *should* support it as the spec allows it, but ...
 *
 * 	MPEG-1/2 currently has no active patents as far as I am aware.
 *  
 *	http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html
 *	http://www.cs.cornell.edu/dali/api/mpegvideo-c.html
 * */

#ifndef JO_INCLUDE_MPEG_H
#define JO_INCLUDE_MPEG_H

#include <stdio.h>

// To get a header file for this, either cut and paste the header,
// or create jo_mpeg.h, #define JO_MPEG_HEADER_FILE_ONLY, and
// then include jo_mpeg.c from it.

// Returns false on failure
extern void jo_write_mpeg(FILE *fp, const unsigned char *rgbx, int width, int height, int fps);

#endif // JO_INCLUDE_MPEG_H

#ifndef JO_MPEG_HEADER_FILE_ONLY

#include <stdio.h>
#include <math.h>
#include <memory.h>

// Huffman tables
static const unsigned char s_jo_HTDC_Y[9][2] = { {4,3}, {0,2}, {1,2}, {5,3}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7} };
static const unsigned char s_jo_HTDC_C[9][2] = { {0,2}, {1,2}, {2,2}, {6,3}, {14,4}, {30,5}, {62,6}, {126,7}, {254,8} };
static const unsigned char s_jo_HTAC[32][40][2] = {
{ {6,3},{8,5},{10,6},{12,8},{76,9},{66,9},{20,11},{58,13},{48,13},{38,13},{32,13},{52,14},{50,14},{48,14},{46,14},{62,15},{62,15},{58,15},{56,15},{54,15},{52,15},{50,15},{48,15},{46,15},{44,15},{42,15},{40,15},{38,15},{36,15},{34,15},{32,15},{48,16},{46,16},{44,16},{42,16},{40,16},{38,16},{36,16},{34,16},{32,16},},
{ {6,4},{12,7},{74,9},{24,11},{54,13},{44,14},{42,14},{62,16},{60,16},{58,16},{56,16},{54,16},{52,16},{50,16},{38,17},{36,17},{34,17},{32,17} },
{ {10,5},{8,8},{22,11},{40,13},{40,14} },
{ {14,6},{72,9},{56,13},{38,14} },
{ {12,6},{30,11},{36,13} },  { {14,7},{18,11},{36,14} },  { {10,7},{60,13},{40,17} },
{ {8,7},{42,13} },  { {14,8},{34,13} },  { {10,8},{34,14} },  { {78,9},{32,14} },  { {70,9},{52,17} },  { {68,9},{50,17} },  { {64,9},{48,17} },  { {28,11},{46,17} },  { {26,11},{44,17} },  { {16,11},{42,17} },
{ {62,13} }, { {52,13} }, { {50,13} }, { {46,13} }, { {44,13} }, { {62,14} }, { {60,14} }, { {58,14} }, { {56,14} }, { {54,14} }, { {62,17} }, { {60,17} }, { {58,17} }, { {56,17} }, { {54,17} },
};
static const float s_jo_quantTbl[64] = {
	0.015625f,0.005632f,0.005035f,0.004832f,0.004808f,0.005892f,0.007964f,0.013325f,
	0.005632f,0.004061f,0.003135f,0.003193f,0.003338f,0.003955f,0.004898f,0.008828f,
	0.005035f,0.003135f,0.002816f,0.003013f,0.003299f,0.003581f,0.005199f,0.009125f,
	0.004832f,0.003484f,0.003129f,0.003348f,0.003666f,0.003979f,0.005309f,0.009632f,
	0.005682f,0.003466f,0.003543f,0.003666f,0.003906f,0.004546f,0.005774f,0.009439f,
	0.006119f,0.004248f,0.004199f,0.004228f,0.004546f,0.005062f,0.006124f,0.009942f,
	0.008883f,0.006167f,0.006096f,0.005777f,0.006078f,0.006391f,0.007621f,0.012133f,
	0.016780f,0.011263f,0.009907f,0.010139f,0.009849f,0.010297f,0.012133f,0.019785f,
};
static const unsigned char s_jo_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18,24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 };

typedef struct {
	FILE *fp;
	int buf, cnt;
} jo_bits_t;

static void jo_writeBits(jo_bits_t *b, int value, int count) {
	b->cnt += count;
	b->buf |= value << (24 - b->cnt);
	while(b->cnt >= 8) {
		unsigned char c = (b->buf >> 16) & 255;
		putc(c, b->fp);
		b->buf <<= 8;
		b->cnt -= 8;
	}
}

static void jo_DCT(float *d0, float *d1, float *d2, float *d3, float *d4, float *d5, float *d6, float *d7) {
	float tmp0 = *d0 + *d7;
	float tmp7 = *d0 - *d7;
	float tmp1 = *d1 + *d6;
	float tmp6 = *d1 - *d6;
	float tmp2 = *d2 + *d5;
	float tmp5 = *d2 - *d5;
	float tmp3 = *d3 + *d4;
	float tmp4 = *d3 - *d4;

	// Even part
	float tmp10 = tmp0 + tmp3;	// phase 2
	float tmp13 = tmp0 - tmp3;
	float tmp11 = tmp1 + tmp2;
	float tmp12 = tmp1 - tmp2;

	*d0 = tmp10 + tmp11; 		// phase 3
	*d4 = tmp10 - tmp11;

	float z1 = (tmp12 + tmp13) * 0.707106781f; // c4
	*d2 = tmp13 + z1; 		// phase 5
	*d6 = tmp13 - z1;

	// Odd part
	tmp10 = tmp4 + tmp5; 		// phase 2
	tmp11 = tmp5 + tmp6;
	tmp12 = tmp6 + tmp7;

	// The rotator is modified from fig 4-8 to avoid extra negations.
	float z5 = (tmp10 - tmp12) * 0.382683433f; // c6
	float z2 = tmp10 * 0.541196100f + z5; // c2-c6
	float z4 = tmp12 * 1.306562965f + z5; // c2+c6
	float z3 = tmp11 * 0.707106781f; // c4

	float z11 = tmp7 + z3;		// phase 5
	float z13 = tmp7 - z3;

	*d5 = z13 + z2;			// phase 6
	*d3 = z13 - z2;
	*d1 = z11 + z4;
	*d7 = z11 - z4;
} 

static int jo_processDU(jo_bits_t *bits, float A[64], const unsigned char htdc[9][2], int DC) {
	for(int dataOff=0; dataOff<64; dataOff+=8) {
		jo_DCT(&A[dataOff], &A[dataOff+1], &A[dataOff+2], &A[dataOff+3], &A[dataOff+4], &A[dataOff+5], &A[dataOff+6], &A[dataOff+7]);
	}
	for(int dataOff=0; dataOff<8; ++dataOff) {
		jo_DCT(&A[dataOff], &A[dataOff+8], &A[dataOff+16], &A[dataOff+24], &A[dataOff+32], &A[dataOff+40], &A[dataOff+48], &A[dataOff+56]);
	}
	int Q[64];
	for(int i=0; i<64; ++i) {
		float v = A[i]*s_jo_quantTbl[i];
		Q[s_jo_ZigZag[i]] = (int)(v < 0 ? ceilf(v - 0.5f) : floorf(v + 0.5f));
	}

	DC = Q[0] - DC;
	int aDC = DC < 0 ? -DC : DC;
	int size = 0;
	int tempval = aDC;
	while(tempval) {
		size++;
		tempval >>= 1;
	}
	jo_writeBits(bits, htdc[size][0], htdc[size][1]);
	if(DC < 0) aDC ^= (1 << size) - 1;
	jo_writeBits(bits, aDC, size); 

	int endpos = 63;
	for(; (endpos>0)&&(Q[endpos]==0); --endpos) { /* do nothing */ }
	for(int i = 1; i <= endpos;) {
		int run = 0;
		while (Q[i]==0 && i<endpos) {
			++run;
			++i;
		}
		int AC = Q[i++];
		int aAC = AC < 0 ? -AC : AC;
		int code = 0, size = 0;
		if (run<32 && aAC<=40) {
			code = s_jo_HTAC[run][aAC-1][0];
			size = s_jo_HTAC[run][aAC-1][1];
			if (AC < 0) code += 1;
		}
		if(!size) {
			jo_writeBits(bits, 1, 6);
			jo_writeBits(bits, run, 6);
			if (AC < -127) {
				jo_writeBits(bits, 128, 8);
			} else if(AC > 127) {
				jo_writeBits(bits, 0, 8);
			}
			code = AC&255;
			size = 8;
		}
		jo_writeBits(bits, code, size);
	}
	jo_writeBits(bits, 2, 2);

	return Q[0];
}

void jo_write_mpeg(FILE *fp, const unsigned char *rgbx, int width, int height, int fps) {
	int lastDCY = 128, lastDCCR = 128, lastDCCB = 128;
	jo_bits_t bits = {fp};

	// Sequence Header
	fwrite("\x00\x00\x01\xB3", 4, 1, fp);
	// 12 bits for width, height
	putc((width>>4)&0xFF, fp);
	putc(((width&0xF)<<4) | ((height>>8) & 0xF), fp);
	putc(height & 0xFF, fp); 
	// aspect ratio, framerate
	if(fps <= 24) putc(0x12, fp);
	else if(fps <= 25) putc(0x13, fp);
	else if(fps <= 30) putc(0x15, fp);
	else if(fps <= 50) putc(0x16, fp);
	else putc(0x18, fp); // 60fps
	fwrite("\xFF\xFF\xE0\xA0", 4, 1, fp);

	fwrite("\x00\x00\x01\xB8\x80\x08\x00\x40", 8, 1, fp); // GOP header
	fwrite("\x00\x00\x01\x00\x00\x0C\x00\x00", 8, 1, fp); // PIC header
	fwrite("\x00\x00\x01\x01", 4, 1, fp); // Slice header
	jo_writeBits(&bits, 0x10, 6);

	for (int vblock=0; vblock<(height+15)/16; vblock++) {
		for (int hblock=0; hblock<(width+15)/16; hblock++) {
			jo_writeBits(&bits, 3, 2);

			float Y[256], CBx[256], CRx[256];
			for (int i=0; i<256; ++i) {
				int y = vblock*16+(i/16);
				int x = hblock*16+(i&15);
				x = x >= width ? width-1 : x;
				y = y >= height ? height-1 : y;
				const unsigned char *c = rgbx + y*width*4+x*4;
				float r = c[0], g = c[1], b = c[2];
				Y[i] = (0.59f*g + 0.30f*r + 0.11f*b) * (219.f/255) + 16;
				CBx[i] = (-0.17f*r - 0.33f*g + 0.50f*b) * (224.f/255) + 128;
				CRx[i] = (0.50f*r - 0.42f*g - 0.08f*b) * (224.f/255) + 128;
			}

			// Downsample Cb,Cr (420 format)
			float CB[64], CR[64];
			for (int i=0; i<64; ++i) {
				int j =(i&7)*2 + (i&56)*4;
				CB[i] = (CBx[j] + CBx[j+1] + CBx[j+16] + CBx[j+17]) * 0.25f;
				CR[i] = (CRx[j] + CRx[j+1] + CRx[j+16] + CRx[j+17]) * 0.25f;
			}

			for (int k1=0; k1<2; ++k1) {
				for (int k2=0; k2<2; ++k2) {
					float block[64];
					for (int i=0; i<64; i+=8) {
						int j = (i&7)+(i&56)*2 + k1*8*16 + k2*8;
						memcpy(block+i, Y+j, 8*sizeof(Y[0]));
					}
					lastDCY = jo_processDU(&bits, block, s_jo_HTDC_Y, lastDCY);
				}
			}
			lastDCCB = jo_processDU(&bits, CB, s_jo_HTDC_C, lastDCCB);
			lastDCCR = jo_processDU(&bits, CR, s_jo_HTDC_C, lastDCCR);
		}
	}
	jo_writeBits(&bits, 0, 7);
	fwrite("\x00\x00\x01\xb7", 4, 1, fp); // End of Sequence
}
#endif

Results #

I encoded a couple of seconds from Big Buck Bunny as a sample: sample.mpeg
Thanks to jsmpeg we can see the output directly in the browser, after muxing it into an MPEG-TS container.

Quality is hardcoded and results in roughly 8mbps at a resolution of 684x385. The encoder performs only intra-frame compression, no inter-frame compression, so in its current state it’s more of a image encoder than a video encoder, as it just appends I-frames. Quality measurements are at roughly 27db PSNR and 0.9 SSIM. These values showcase the advanced age of MPEG-1…

...Or in other words

Code changes #

Input frame vs Output frame. Side effect of conversion: Increased saturation and contrast.

Unfortunately, the output has increased saturation and contrast. This is due to RGB -> YCbCr conversion in line 230 - 232 scaling the final color vectors scaled too much. I fixed this by reverting the color space math changes that happened with the update to v1.02.

Input frame vs Output frame. Reverted to old color math.

I’m not sure why the code change credited to r- lyeh happened, but I guess the used video player handled color space incorrectly. Both VLC and MPV playback the colors correctly with v1.03.

Addendum #

As clarified by @r-lyeh in the comments, the color space fix was regarding the YUV math. I reinserted the fix, as it was always meant to be and bumped the version to v1.04. Here is a sample with the correct color math, shown scene from NeoTokyo°.

The conversion to YCbCr was this in v1.02, scaling the color vectors too much.

  Y[i] = ( 0.299f*r + 0.587f*g + 0.114f*b) * (219.f/255) + 16;
CBx[i] = (-0.299f*r - 0.587f*g + 0.886f*b) * (224.f/255) + 128;
CRx[i] = ( 0.701f*r - 0.587f*g - 0.114f*b) * (224.f/255) + 128;

Here is the same but in v1.03, correct scale but mixed up color components.

  Y[i] = ( 0.59f*r + 0.30f*g + 0.11f*b) * (219.f/255) + 16;
CBx[i] = (-0.17f*r - 0.33f*g + 0.50f*b) * (224.f/255) + 128;
CRx[i] = ( 0.50f*r - 0.42f*g - 0.08f*b) * (224.f/255) + 128;

And finally the new fix, as pointed out by @r-lyeh in v1.04.

  Y[i] = ( 0.59f*g + 0.30f*r + 0.11f*b) * (219.f/255) + 16;
CBx[i] = (-0.17f*r - 0.33f*g + 0.50f*b) * (224.f/255) + 128;
CRx[i] = ( 0.50f*r - 0.42f*g - 0.08f*b) * (224.f/255) + 128;