Float32Array Matrix Row Swapping

Benchmark created by Paul Evans on


Description

Seeing if there is any gain from unrolling a row swap and other ordering decisions.

Setup

var matrix = [1.0, 0.0, 0.0, 0.0,
      0.0, 1.0, 0.0, 0.0,
      0.0, 0.0, 1.0, 0.0,
      0.0, 0.0, 0.0, 1.0
    ];
    
    var expectedMatrix = [1.0, 0.0, 0.0, 0.0,
      0.0, 1.0, 0.0, 0.0,
      0.0, 0.0, 0.0, 1.0,
      0.0, 0.0, 1.0, 0.0
    ];
    
    // Create loads of identity matrix crammed in to one array
    var len = 1600;
    var arr = new Array(len);
    
    for (var i = 0; i < arr.length; i++) {
      arr[i] = matrix[i % 16];
    }
    
    var float32array = new Float32Array(arr);
    
    function swapRow4Float32(matrix, row0Offset, row1Offset) {
      var tmp = 0.0,
        j = 0,
        k = 0;
    
      // Matrix stored as long array by row,
      // so iterating through a row is just a case of computing an offset and end point.
      for (var i = 0; i < 4; i++) {
        j = row0Offset + i;
        k = row1Offset + i;
        tmp = matrix[k];
        matrix[k] = matrix[j];
        matrix[j] = tmp;
      }
    };
    
    function swapRow4Float32r(matrix, row0Offset, row1Offset) {
      var tmp = 0.0,
        j = 0,
        k = 0;
    
      // Matrix stored as long array by row,
      // so iterating through a row is just a case of computing an offset and end point.
      for (var i = 3; i >= 0; i--) {
        j = row0Offset + i;
        k = row1Offset + i;
        tmp = matrix[k];
        matrix[k] = matrix[j];
        matrix[j] = tmp;
      }
    };
    
    function swapRow4Float32Unrolled(matrix, row0Offset, row1Offset) {
      var tmp = 0.0;
    
      // 0
      tmp = matrix[row1Offset];
      matrix[row1Offset] = matrix[row0Offset];
      matrix[row0Offset] = tmp;
    
      // 1
      tmp = matrix[row1Offset + 1];
      matrix[row1Offset] = matrix[row0Offset + 1];
      matrix[row0Offset + 1] = tmp;
    
      // 2
      tmp = matrix[row1Offset + 2];
      matrix[row1Offset + 2] = matrix[row0Offset + 2];
      matrix[row0Offset + 2] = tmp;
    
      // 3
      tmp = matrix[row1Offset + 3];
      matrix[row1Offset + 3] = matrix[row0Offset + 3];
      matrix[row0Offset + 3] = tmp;
    };
    
    function swapRow4Float32UnrolledOff(matrix, row0Offset, row1Offset) {
      var tmp = 0.0,
        i = 0,
        j = 0;
    
      // 0
      tmp = matrix[row1Offset];
      matrix[row1Offset] = matrix[row0Offset];
      matrix[row0Offset] = tmp;
    
      // 1
      i = row0Offset + 1;
      j = row1Offset + 1;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    
      // 2
      i = row0Offset + 2;
      j = row1Offset + 2;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    
      // 3
      i = row0Offset + 3;
      j = row1Offset + 3;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    };
    
    function swapRow4Float32UnrolledRev(matrix, row0Offset, row1Offset) {
      var tmp = 0.0;
      // 3
      tmp = matrix[row1Offset + 3];
      matrix[row1Offset + 3] = matrix[row0Offset + 3];
      matrix[row0Offset + 3] = tmp;
    
      // 2
      tmp = matrix[row1Offset + 2];
      matrix[row1Offset + 2] = matrix[row0Offset + 2];
      matrix[row0Offset + 2] = tmp;
    
      // 1
      tmp = matrix[row1Offset + 1];
      matrix[row1Offset] = matrix[row0Offset + 1];
      matrix[row0Offset + 1] = tmp;
    
      // 0
      tmp = matrix[row1Offset];
      matrix[row1Offset] = matrix[row0Offset];
      matrix[row0Offset] = tmp;
    };
    
    function swapRow4Float32UnrolledRevOff(matrix, row0Offset, row1Offset) {
      var tmp = 0.0,
        i = 0,
        j = 0;
    
      // 3
      i = row0Offset + 3;
      j = row1Offset + 3;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    
      // 2
      i = row0Offset + 2;
      j = row1Offset + 2;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    
      // 1
      i = row0Offset + 1;
      j = row1Offset + 1;
      tmp = matrix[j];
      matrix[j] = matrix[i];
      matrix[i] = tmp;
    
      // 0
      tmp = matrix[row1Offset];
      matrix[row1Offset] = matrix[row0Offset];
      matrix[row0Offset] = tmp;
    };
    
    function swapAllRow4Float32() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32(float32array, i + 8, i + 12);
      }
    };
    
    function swapAllRow4Float32r() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32r(float32array, i + 8, i + 12);
      }
    };
    
    function swapAllRow4Float32Unrolled() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32Unrolled(float32array, i + 8, i + 12);
      }
    };
    
    function swapAllRow4Float32UnrolledOff() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32UnrolledOff(float32array, i + 8, i + 12);
      }
    };
    
    function swapAllRow4Float32UnrolledRev() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32UnrolledRev(float32array, i + 8, i + 12);
      }
    };
    
    function swapAllRow4Float32UnrolledRevOff() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        swapRow4Float32UnrolledRevOff(float32array, i + 8, i + 12);
      }
    };
    
    function test() {
      var len = float32array.length;
      for (var i = 0; i < len; i = i + 16) {
        if (!(float32array[i] == expectedMatrix[0] &&
          float32array[i + 1] == expectedMatrix[1] &&
          float32array[i + 2] == expectedMatrix[2] &&
          float32array[i + 3] == expectedMatrix[3] &&
          float32array[i + 4] == expectedMatrix[4] &&
          float32array[i + 5] == expectedMatrix[5] &&
          float32array[i + 6] == expectedMatrix[6] &&
          float32array[i + 7] == expectedMatrix[7] &&
          float32array[i + 8] == expectedMatrix[8] &&
          float32array[i + 9] == expectedMatrix[9] &&
          float32array[i + 10] == expectedMatrix[10] &&
          float32array[i + 11] == expectedMatrix[11] &&
          float32array[i + 12] == expectedMatrix[12] &&
          float32array[i + 13] == expectedMatrix[13] &&
          float32array[i + 14] == expectedMatrix[14] &&
          float32array[i + 15] == expectedMatrix[15])) {
    
          console.log("i=" + i);
          console.log(float32array[0 + 0 + i] + " " + float32array[0 + 1 + i] + " " + float32array[0 + 2 + i] + " " + float32array[0 + 3 + i]);
          console.log(float32array[4 + 0 + i] + " " + float32array[4 + 1 + i] + " " + float32array[4 + 2 + i] + " " + float32array[4 + 3 + i]);
          console.log(float32array[8 + 0 + i] + " " + float32array[8 + 1 + i] + " " + float32array[8 + 2 + i] + " " + float32array[8 + 3 + i]);
          console.log(float32array[12 + 0 + i] + " " + float32array[12 + 1 + i] + " " + float32array[12 + 2 + i] + " " + float32array[12 + 3 + i]);
          console.log("failed");
          break;
        }
      }
    
      console.log("finished");
    };
    
    //swapAllRow4Float32();
    //swapAllRow4Float32r();
    //swapAllRow4Float32Unrolled();
    //swapAllRow4Float32UnrolledOff();
    //swapAllRow4Float32UnrolledRev();
    //swapAllRow4Float32UnrolledRevOff();
    //test();

Test runner

Ready to run.

Testing in
TestOps/sec
base loop
swapAllRow4Float32();
ready
unrolled
swapAllRow4Float32Unrolled();
ready
+offset
swapAllRow4Float32UnrolledOff();
ready
+rev
swapAllRow4Float32UnrolledRev();
ready
+rev +offset
swapAllRow4Float32UnrolledRevOff();
ready
base rev
swapAllRow4Float32r();
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.

  • Revision 1: published by Paul Evans on