Native Set vs Binary Search (SHA-256 Hashes)

Benchmark created on


Description

This test compares the performance of looking up values in a native JavaScript Set versus using a binary search algorithm on a sorted array.

The setup code generates 1100 SHA-256 hashes:

Hashes of 1000 random numbers.

Hashes of the integers from 1 to 100 (which will serve as the known keys for our lookups).

This combined array of 1100 hashes is then sorted alphabetically.

Setup

var sha256 = function sha256(ascii) {
    function rightRotate(value, amount) {
        return (value>>>amount) | (value<<(32 - amount));
    };
    
    var mathPow = Math.pow;
    var maxWord = mathPow(2, 32);
    var lengthProperty = 'length'
    var i, j; // Used as a counter across the whole file
    var result = ''

    var words = [];
    var asciiBitLength = ascii[lengthProperty]*8;
    
    //* caching results is optional - remove/add slash from front of this line to toggle
    // Initial hash value: first 32 bits of the fractional parts of the square roots of the first 8 primes
    // (we actually calculate the first 64, but extra values are just ignored)
    var hash = sha256.h = sha256.h || [];
    // Round constants: first 32 bits of the fractional parts of the cube roots of the first 64 primes
    var k = sha256.k = sha256.k || [];
    var primeCounter = k[lengthProperty];
    /*/
    var hash = [], k = [];
    var primeCounter = 0;
    //*/

    var isComposite = {};
    for (var candidate = 2; primeCounter < 64; candidate++) {
        if (!isComposite[candidate]) {
            for (i = 0; i < 313; i += candidate) {
                isComposite[i] = candidate;
            }
            hash[primeCounter] = (mathPow(candidate, .5)*maxWord)|0;
            k[primeCounter++] = (mathPow(candidate, 1/3)*maxWord)|0;
        }
    }
    
    ascii += '\x80' // Append Ƈ' bit (plus zero padding)
    while (ascii[lengthProperty]%64 - 56) ascii += '\x00' // More zero padding
    for (i = 0; i < ascii[lengthProperty]; i++) {
        j = ascii.charCodeAt(i);
        if (j>>8) return; // ASCII check: only accept characters in range 0-255
        words[i>>2] |= j << ((3 - i)%4)*8;
    }
    words[words[lengthProperty]] = ((asciiBitLength/maxWord)|0);
    words[words[lengthProperty]] = (asciiBitLength)
    
    // process each chunk
    for (j = 0; j < words[lengthProperty];) {
        var w = words.slice(j, j += 16); // The message is expanded into 64 words as part of the iteration
        var oldHash = hash;
        // This is now the undefinedworking hash", often labelled as variables a...g
        // (we have to truncate as well, otherwise extra entries at the end accumulate
        hash = hash.slice(0, 8);
        
        for (i = 0; i < 64; i++) {
            var i2 = i + j;
            // Expand the message into 64 words
            // Used below if 
            var w15 = w[i - 15], w2 = w[i - 2];

            // Iterate
            var a = hash[0], e = hash[4];
            var temp1 = hash[7]
                + (rightRotate(e, 6) ^ rightRotate(e, 11) ^ rightRotate(e, 25)) // S1
                + ((e&hash[5])^((~e)&hash[6])) // ch
                + k[i]
                // Expand the message schedule if needed
                + (w[i] = (i < 16) ? w[i] : (
                        w[i - 16]
                        + (rightRotate(w15, 7) ^ rightRotate(w15, 18) ^ (w15>>>3)) // s0
                        + w[i - 7]
                        + (rightRotate(w2, 17) ^ rightRotate(w2, 19) ^ (w2>>>10)) // s1
                    )|0
                );
            // This is only used once, so *could* be moved below, but it only saves 4 bytes and makes things unreadble
            var temp2 = (rightRotate(a, 2) ^ rightRotate(a, 13) ^ rightRotate(a, 22)) // S0
                + ((a&hash[1])^(a&hash[2])^(hash[1]&hash[2])); // maj
            
            hash = [(temp1 + temp2)|0].concat(hash); // We don't bother trimming off the extra ones, they're harmless as long as we're truncating when we do the slice()
            hash[4] = (hash[4] + temp1)|0;
        }
        
        for (i = 0; i < 8; i++) {
            hash[i] = (hash[i] + oldHash[i])|0;
        }
    }
    
    for (i = 0; i < 8; i++) {
        for (j = 3; j + 1; j--) {
            var b = (hash[i]>>(j*8))&255;
            result += ((b < 16) ? 0 : '') + b.toString(16);
        }
    }
    return result;
};

console.log('Preparing data synchronously...');

const totalRandomItems = 1000;
const totalKnownItems = 100;

// These will be accessed by the test cases
var sortedHashes;
var keysToLookup;

const allHashes = [];
const knownKeyHashes = [];

// 1. Generate hashes for random numbers
for (let i = 0; i < totalRandomItems; i++) {
  const randomStr = String(Math.random());
  // Use CryptoJS for synchronous hashing
  const hash = sha256(randomStr);
  allHashes.push(hash);
}

// 2. Generate hashes for known keys (1 to 100)
for (let i = 1; i <= totalKnownItems; i++) {
  const knownKeyStr = String(i);
  const hash = sha256(knownKeyStr);
  // Add to both the main list and the list of keys to look up
  allHashes.push(hash);
  knownKeyHashes.push(hash);
}

// 3. Sort the combined array of hashes
allHashes.sort();

// 4. Assign the prepared data to the global variables for the tests
sortedHashes = allHashes;
keysToLookup = knownKeyHashes;

console.log(`Data preparation complete. Total items: ${sortedHashes.length}`);
console.log('First 3 keys to look up:', keysToLookup.slice(0, 3));
console.log('First 3 sorted hashes:', sortedHashes.slice(0, 3));

Test runner

Ready to run.

Testing in
TestOps/sec
Set#has lookup (includes Set creation)
// This test includes the O(n) cost of creating the Set from the array.
const hashSet = new Set(sortedHashes);
let foundCount = 0;

for (let i = 0; i < keysToLookup.length; i++) {
  if (hashSet.has(keysToLookup[i])) {
    foundCount++;
  }
}
ready
Binary Search lookup
function binarySearch(arr, target) {
  let low = 0;
  let high = arr.length - 1;
  let mid;

  while (low <= high) {
    // Use bitwise shift for a potentially faster floor operation
    mid = (low + high) >> 1;
    const current = arr[mid];

    if (current < target) {
      low = mid + 1;
    } else if (current > target) {
      high = mid - 1;
    } else {
      // Target found
      return mid;
    }
  }
  // Target not found
  return -1;
}

let foundCount = 0;
for (let i = 0; i < keysToLookup.length; i++) {
  if (binarySearch(sortedHashes, keysToLookup[i]) !== -1) {
    foundCount++;
  }
}
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.