popcount-buffer

JavaScript performance comparison

Test case created by blake-regalia

Preparation code


      
      <script>
Benchmark.prototype.setup = function() {
  const NB_DATA = 2 << 19;  // 1 MiB
  const AT_DATA = new Uint8Array(NB_DATA);
  
  function popcount_uint32(x) {
  	x -= (x >>> 1) & 0x55555555;
  	x = (x & 0x33333333) + ((x >>> 2) & 0x33333333);
  	return ((x + (x >>> 4) & 0xf0f0f0f) * 0x1010101) >>> 24;
  }
  
  function popcount_uint16(x) {
  	x -= (x >>> 1) & 0x5555;
  	x = (x & 0x3333) + ((x >>> 2) & 0x3333);
  	return ((x + (x >>> 4) & 0xf0f) * 0x1010101) >>> 24;
  }
  
  const ATU8_POPCOUNTS_8 = new Uint8Array(1 << 8);
  {
  	for(let x=0; x<(1 << 9); x++) {
  		ATU8_POPCOUNTS_8[x] = popcount_uint32(x);
  	}
  }
  
  const ATU8_POPCOUNTS_14 = new Uint8Array(1 << 14);
  {
  	for(let x=0; x<(1 << 14); x++) {
  		ATU8_POPCOUNTS_14[x] = popcount_uint32(x);
  	}
  }
  
  const ATU8_POPCOUNTS_15 = new Uint8Array(1 << 15);
  {
  	for(let x=0; x<(1 << 15); x++) {
  		ATU8_POPCOUNTS_15[x] = popcount_uint32(x);
  	}
  }
  
  const ATU8_POPCOUNTS_16 = new Uint8Array(1 << 16);
  {
  	for(let x=0; x<(1 << 16); x++) {
  		ATU8_POPCOUNTS_16[x] = popcount_uint32(x);
  	}
  }
  
  const ATU8_POPCOUNTS_16_PACKED_HI = new Uint8Array(1 << 15);
  {
  	for(let x=0; x<(1 << 16); x+=2) {
  		ATU8_POPCOUNTS_16_PACKED_HI[x/2] = (popcount_uint32(x) << 4) | popcount_uint32(x+1);
  	}
  
  	ATU8_POPCOUNTS_16_PACKED_HI[(1 << 15)-1] = 15 << 4;
  }
  
  const ATU8_POPCOUNTS_16_PACKED_LO = new Uint8Array(1 << 15);
  {
  	for(let x=1; x<(1 << 16); x+=2) {
  		ATU8_POPCOUNTS_16_PACKED_LO[(x-1)/2] = ((popcount_uint32(x)-1) << 4) | (popcount_uint32(x+1) - 1);
  	}
  
  	ATU8_POPCOUNTS_16_PACKED_LO[(1 << 15)-1] = (14 << 4) | 15;
  }
  
  function popcount_uint16_cache14(x) {
  	if(x < 0x8000) {
  		if(x < 0x4000) {
  			return ATU8_POPCOUNTS_14[x];
  		}
  		else {
  			return ATU8_POPCOUNTS_14[x & 0x3fff] + 1;
  		}
  	}
  	else if(x < 0xc000) {
  		return ATU8_POPCOUNTS_14[x & 0x3fff] + 1;
  	}
  	else {
  		return ATU8_POPCOUNTS_14[x & 0x3fff] + 2;
  	}
  }
  
  function popcount_uint16_cache15(x) {
  	return ATU8_POPCOUNTS_15[x & 0x7fff] + (x >= 0x8000);
  }
  
  function popcount_uint16_cache16(x) {
  	return ATU8_POPCOUNTS_16[x];
  }
  
  function popcount_uint16_cache16_packed_hi(x) {
  	let xp = ATU8_POPCOUNTS_16_PACKED_HI[x>>>1];
  	if(x % 2) return 0xfffe < x? 16: xp & 0xf;
  	return xp >>> 4;
  }
  
  function popcount_uint16_cache16_packed_lo(x) {
  	if(x % 2) return (ATU8_POPCOUNTS_16_PACKED_LO[x>>>1] >>> 4) + 1;
  	return x? (ATU8_POPCOUNTS_16_PACKED_LO[x>>>1] & 0xf) + 1: 0;
  }
  
  function popcount_uint32_cache8(x) {
  	return ATU8_POPCOUNTS_8[x & 0xff]
  		+ ATU8_POPCOUNTS_8[(x >>> 8) & 0xff]
  		+ ATU8_POPCOUNTS_8[(x >>> 16) & 0xff]
  		+ ATU8_POPCOUNTS_8[x >>> 24];
  }
  
  function popcount_uint32_cache14(x) {
  	if(x < 0x8000) {
  		if(x < 0x4000) {
  			return ATU8_POPCOUNTS_14[x];
  		}
  		else {
  			return ATU8_POPCOUNTS_14[x & 0x3fff] + 1;
  		}
  	}
  	else if(x < 0xc000) {
  		return ATU8_POPCOUNTS_14[x & 0x3fff] + 1;
  	}
  	else {
  		return ATU8_POPCOUNTS_14[x & 0x3fff] + 2;
  	}
  }
  
  function popcount_uint32_cache15(x) {
  	// let x_hi = ((x / 0x20000) | 0);
  	let x_hi = x >>> 17;
  	let x_mid = (x & 0x18000) >>> 15;
  	return ATU8_POPCOUNTS_15[x & 0x7fff]
  		+ ATU8_POPCOUNTS_15[x_hi & 0x7fff]
  		+ (x_mid % 2) + (x_mid >>> 1);
  }
  
  function popcount_uint32_cache16(x) {
  	// return ATU8_POPCOUNTS_16[x&0xffff] + ATU8_POPCOUNTS_16[(x / 0x10000) | 0];
  	return ATU8_POPCOUNTS_16[x & 0xffff] + ATU8_POPCOUNTS_16[x >>> 16];
  }
  
  
  {
  	class MersenneTwister {
  		constructor(seed=5489) {
  			let state = this.state = new Array(624);
  			let next;
  			// if no seed is given, use default value 5489
  			this.seed = seed;
  
  			// fill initial state
  			state[0] = seed;
  			for(let i=1; i<624; i++) {
  				let s = state[i-1] ^ (state[i-1] >>> 30);
  				// avoid multiplication overflow: split 32 bits into 2x 16 bits and process them individually
  				state[i] = (((((s & 0xffff0000) >>> 16) * 1812433253) << 16)
  					+ (s & 0x0000ffff) * 1812433253) + i;
  				// convert to 32 bit unsigned int
  				state[i] |= 0;
  			}
  
  			// twist'n'shout
  			this.twist();
  		}
  
  		// private function: create new state (based on old one)
  		twist() {
  			let state = this.state;
  
  			// first 624-397=227 words
  			for(let i = 0; i < 227; i++) {
  				let bits = (state[i] & 0x80000000) | (state[i + 1] & 0x7fffffff);
  				state[i] = state[i + 397] ^ (bits >>> 1) ^ ((bits & 1) * 0x9908b0df);
  			}
  			// remaining words (except the very last one)
  			for(let i = 227 ; i < 623; i++) {
  				let bits = (state[i] & 0x80000000) | (state[i + 1] & 0x7fffffff);
  				state[i] = state[i - 227] ^ (bits >>> 1) ^ ((bits & 1) * 0x9908b0df);
  			}
  			// last word is computed pretty much the same way, but i + 1 must wrap around to 0
  			let bits = (state[623] & 0x80000000) | (state[0] & 0x7fffffff);
  			state[623] = state[396] ^ (bits >>> 1) ^ ((bits & 1) * 0x9908b0df);
  
  			// word used for next random number
  			this.next = 0;
  		}
  
  		// public function: return a random 32 bit number
  		random() {
  			// compute new state ?
  			if(this.next >= 624) this.twist();
  
  			// shuffle bits around
  			let x = this.state[this.next++];
  			x ^= x >>> 11;
  			x ^= (x << 7) & 0x9d2c5680;
  			x ^= (x << 15) & 0xefc60000;
  			x ^= x >>> 18;
  			return x;
  		}
  	}
  
  	let k_twister = new MersenneTwister();
  	let atu32_data = new Uint32Array(AT_DATA.buffer, AT_DATA.byteOffset, AT_DATA.byteLength >> 2);
  	let ntu32_data = atu32_data.length;
  	for(let it_scan=0; it_scan<ntu32_data; it_scan++) {
  		atu32_data[it_scan] = k_twister.random();
  	}
  }
  
  function test_uint16(f_popcount) {
  	let c_pop = 0;
  	let atu16_data = new Uint16Array(AT_DATA.buffer, AT_DATA.byteOffset, AT_DATA.byteLength >> 1);
  	let ntu16_data = atu16_data.length;
  	for(let it_scan=0; it_scan<ntu16_data; it_scan++) {
  		c_pop += f_popcount(atu16_data[it_scan]);
  	}
  	return c_pop;
  }
  
  
  function test_uint32(f_popcount) {
  	let c_pop = 0;
  	let atu32_data = new Uint32Array(AT_DATA.buffer, AT_DATA.byteOffset, AT_DATA.byteLength >> 2);
  	let ntu32_data = atu32_data.length;
  	for(let it_scan=0; it_scan<ntu32_data; it_scan++) {
  		c_pop += f_popcount(atu32_data[it_scan]);
  	}
  	return c_pop;
  }
  

};
</script>

Test runner

Warning! For accurate results, please disable Firebug before running the tests. (Why?)

Java applet disabled.

Testing in CCBot 2.0.0 / Other 0.0.0
Test Ops/sec
16-bit Reads, Popcount Uint16 (No Cache)
// 0 KiB of cache overhead
test_uint16(popcount_uint16);
pending…
16-bit Reads, Popcount Uint32 (No Cache)
// 0 KiB of cache overhead
test_uint16(popcount_uint32);
pending…
16-bit Reads, 14-bit Cache Plain
// 16 KiB of cache overhead
test_uint16(popcount_uint16_cache14);
pending…
16-bit Reads, 15-bit Cache Plain
// 32 KiB of cache overhead
test_uint16(popcount_uint16_cache15);
pending…
16-bit Reads, 16-bit Cache Packed (Offset Lo)
// 32 KiB of cache overhead
test_uint16(popcount_uint16_cache16_packed_lo);
pending…
16-bit Reads, 16-bit Cache Packed (Offset Hi)
// 32 KiB of cache overhead
test_uint16(popcount_uint16_cache16_packed_hi);
pending…
16-bit Reads, 16-bit Cache Plain
// 64 KiB of cache overhead
test_uint16(popcount_uint16_cache16);
pending…
32-bit Reads, Popcount Uint32 (No Cache)
// 0 KiB of cache overhead
test_uint32(popcount_uint32);
pending…
32-bit Reads, 8-bit Cache Plain
// 0.25 KiB of cache overhead
test_uint32(popcount_uint32_cache8);
pending…
32-bit Reads, 15-bit Cache Plain (Twiddle Mid)
// 32 KiB of cache overhead
test_uint32(popcount_uint32_cache15);
pending…
32-bit Reads, 16-bit Cache Plain
// 64 KiB of cache overhead
test_uint32(popcount_uint32_cache16);
pending…

You can edit these tests or add even more tests to this page by appending /edit to the URL.

Compare results of other browsers

0 Comments