
/* autogenerated from schroorc.orc */

#include <orc/orc.h>
#include <orc-test/orctest.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>

#ifndef _ORC_INTEGER_TYPEDEFS_
#define _ORC_INTEGER_TYPEDEFS_
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#include <stdint.h>
typedef int8_t orc_int8;
typedef int16_t orc_int16;
typedef int32_t orc_int32;
typedef int64_t orc_int64;
typedef uint8_t orc_uint8;
typedef uint16_t orc_uint16;
typedef uint32_t orc_uint32;
typedef uint64_t orc_uint64;
#define ORC_UINT64_C(x) UINT64_C(x)
#elif defined(_MSC_VER)
typedef signed __int8 orc_int8;
typedef signed __int16 orc_int16;
typedef signed __int32 orc_int32;
typedef signed __int64 orc_int64;
typedef unsigned __int8 orc_uint8;
typedef unsigned __int16 orc_uint16;
typedef unsigned __int32 orc_uint32;
typedef unsigned __int64 orc_uint64;
#define ORC_UINT64_C(x) (x##Ui64)
#else
#include <limits.h>
typedef signed char orc_int8;
typedef short orc_int16;
typedef int orc_int32;
typedef unsigned char orc_uint8;
typedef unsigned short orc_uint16;
typedef unsigned int orc_uint32;
#if INT_MAX == LONG_MAX
typedef long long orc_int64;
typedef unsigned long long orc_uint64;
#define ORC_UINT64_C(x) (x##ULL)
#else
typedef long orc_int64;
typedef unsigned long orc_uint64;
#define ORC_UINT64_C(x) (x##UL)
#endif
#endif
typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
#endif

/* begin Orc C target preamble */
#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
#define ORC_ABS(a) ((a)<0 ? -(a) : (a))
#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
#define ORC_SB_MAX 127
#define ORC_SB_MIN (-1-ORC_SB_MAX)
#define ORC_UB_MAX 255
#define ORC_UB_MIN 0
#define ORC_SW_MAX 32767
#define ORC_SW_MIN (-1-ORC_SW_MAX)
#define ORC_UW_MAX 65535
#define ORC_UW_MIN 0
#define ORC_SL_MAX 2147483647
#define ORC_SL_MIN (-1-ORC_SL_MAX)
#define ORC_UL_MAX 4294967295U
#define ORC_UL_MIN 0
#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#define ORC_RESTRICT restrict
#elif defined(__GNUC__) && __GNUC__ >= 4
#define ORC_RESTRICT __restrict__
#else
#define ORC_RESTRICT
#endif
/* end Orc C target preamble */


/* orc_add2_rshift_add_s16_22_op */
static void
_backup_orc_add2_rshift_add_s16_22_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];

    /* 3: loadpw */
    var35.i = 0x00000002; /* 2 or 9.88131e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr5[i];
    /* 1: loadw */
    var34 = ptr6[i];
    /* 2: addw */
    var38.i = var33.i + var34.i;
    /* 4: addw */
    var39.i = var38.i + var35.i;
    /* 5: shrsw */
    var40.i = var39.i >> 2;
    /* 6: loadw */
    var36 = ptr4[i];
    /* 7: addw */
    var37.i = var36.i + var40.i;
    /* 8: storew */
    ptr0[i] = var37;
  }

}

/* orc_add2_rshift_add_s16_22 */
static void
_backup_orc_add2_rshift_add_s16_22 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 3: loadpw */
    var35.i = 0x00000002; /* 2 or 9.88131e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: loadw */
    var34 = ptr5[i];
    /* 2: addw */
    var38.i = var33.i + var34.i;
    /* 4: addw */
    var39.i = var38.i + var35.i;
    /* 5: shrsw */
    var40.i = var39.i >> 2;
    /* 6: loadw */
    var36 = ptr0[i];
    /* 7: addw */
    var37.i = var36.i + var40.i;
    /* 8: storew */
    ptr0[i] = var37;
  }

}

/* orc_add2_rshift_sub_s16_22_op */
static void
_backup_orc_add2_rshift_sub_s16_22_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];

    /* 3: loadpw */
    var35.i = 0x00000002; /* 2 or 9.88131e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr5[i];
    /* 1: loadw */
    var34 = ptr6[i];
    /* 2: addw */
    var38.i = var33.i + var34.i;
    /* 4: addw */
    var39.i = var38.i + var35.i;
    /* 5: shrsw */
    var40.i = var39.i >> 2;
    /* 6: loadw */
    var36 = ptr4[i];
    /* 7: subw */
    var37.i = var36.i - var40.i;
    /* 8: storew */
    ptr0[i] = var37;
  }

}

/* orc_add2_rshift_sub_s16_22 */
static void
_backup_orc_add2_rshift_sub_s16_22 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 3: loadpw */
    var35.i = 0x00000002; /* 2 or 9.88131e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: loadw */
    var34 = ptr5[i];
    /* 2: addw */
    var38.i = var33.i + var34.i;
    /* 4: addw */
    var39.i = var38.i + var35.i;
    /* 5: shrsw */
    var40.i = var39.i >> 2;
    /* 6: loadw */
    var36 = ptr0[i];
    /* 7: subw */
    var37.i = var36.i - var40.i;
    /* 8: storew */
    ptr0[i] = var37;
  }

}

/* orc_add2_rshift_add_s16_11_op */
static void
_backup_orc_add2_rshift_add_s16_11_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr5[i];
    /* 1: loadw */
    var34 = ptr6[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr4[i];
    /* 4: addw */
    var36.i = var35.i + var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_add2_rshift_add_s16_11 */
static void
_backup_orc_add2_rshift_add_s16_11 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: loadw */
    var34 = ptr5[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr0[i];
    /* 4: addw */
    var36.i = var35.i + var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_add2_rshift_sub_s16_11_op */
static void
_backup_orc_add2_rshift_sub_s16_11_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr5[i];
    /* 1: loadw */
    var34 = ptr6[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr4[i];
    /* 4: subw */
    var36.i = var35.i - var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_add2_rshift_sub_s16_11 */
static void
_backup_orc_add2_rshift_sub_s16_11 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: loadw */
    var34 = ptr5[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr0[i];
    /* 4: subw */
    var36.i = var35.i - var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_add_const_rshift_s16_11 */
static void
_backup_orc_add_const_rshift_s16_11 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 1: loadpw */
    var34.i = 0x00000001; /* 1 or 4.94066e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 2: addw */
    var36.i = var33.i + var34.i;
    /* 3: shrsw */
    var35.i = var36.i >> 1;
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_add_const_rshift_s16 */
static void
_backup_orc_add_const_rshift_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];

    /* 1: loadpw */
    var34.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr0[i];
    /* 2: addw */
    var36.i = var33.i + var34.i;
    /* 3: shrsw */
    var35.i = var36.i >> ex->params[25];
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_add_s16 */
static void
_backup_orc_add_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: loadw */
    var33 = ptr5[i];
    /* 2: addw */
    var34.i = var32.i + var33.i;
    /* 3: storew */
    ptr0[i] = var34;
  }

}

/* orc_add_s16_2d */
static void
_backup_orc_add_s16_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var32 = ptr0[i];
      /* 1: loadw */
      var33 = ptr4[i];
      /* 2: addw */
      var34.i = var32.i + var33.i;
      /* 3: storew */
      ptr0[i] = var34;
    }
  }

}

/* orc_addc_rshift_s16 */
static void
_backup_orc_addc_rshift_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: loadw */
    var34 = ptr5[i];
    /* 2: addw */
    var36.i = var33.i + var34.i;
    /* 3: shrsw */
    var35.i = var36.i >> ex->params[24];
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_lshift1_s16 */
static void
_backup_orc_lshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_union16 var33;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: shlw */
    var33.i = var32.i << 1;
    /* 2: storew */
    ptr0[i] = var33;
  }

}

/* orc_lshift2_s16 */
static void
_backup_orc_lshift2_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_union16 var33;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: shlw */
    var33.i = var32.i << 2;
    /* 2: storew */
    ptr0[i] = var33;
  }

}

/* orc_lshift_s16_ip */
static void
_backup_orc_lshift_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var32;
  orc_union16 var33;

  ptr0 = (orc_union16 *)ex->arrays[0];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr0[i];
    /* 1: shlw */
    var33.i = var32.i << ex->params[24];
    /* 2: storew */
    ptr0[i] = var33;
  }

}

/* orc_mas2_add_s16_op */
static void
_backup_orc_mas2_add_s16_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union32 var42;
  orc_union32 var43;
  orc_union16 var44;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];

    /* 3: loadpw */
    var36.i = ex->params[24];
    /* 5: loadpl */
    var37.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr5[i];
    /* 1: loadw */
    var35 = ptr6[i];
    /* 2: addw */
    var40.i = var34.i + var35.i;
    /* 4: mulswl */
    var41.i = var40.i * var36.i;
    /* 6: addl */
    var42.i = var41.i + var37.i;
    /* 7: shrsl */
    var43.i = var42.i >> ex->params[26];
    /* 8: convlw */
    var44.i = var43.i;
    /* 9: loadw */
    var38 = ptr4[i];
    /* 10: addw */
    var39.i = var38.i + var44.i;
    /* 11: storew */
    ptr0[i] = var39;
  }

}

/* orc_mas2_add_s16_ip */
static void
_backup_orc_mas2_add_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union32 var42;
  orc_union32 var43;
  orc_union16 var44;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 3: loadpw */
    var36.i = ex->params[24];
    /* 5: loadpl */
    var37.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: loadw */
    var35 = ptr5[i];
    /* 2: addw */
    var40.i = var34.i + var35.i;
    /* 4: mulswl */
    var41.i = var40.i * var36.i;
    /* 6: addl */
    var42.i = var41.i + var37.i;
    /* 7: shrsl */
    var43.i = var42.i >> ex->params[26];
    /* 8: convlw */
    var44.i = var43.i;
    /* 9: loadw */
    var38 = ptr0[i];
    /* 10: addw */
    var39.i = var38.i + var44.i;
    /* 11: storew */
    ptr0[i] = var39;
  }

}

/* orc_mas2_sub_s16_op */
static void
_backup_orc_mas2_sub_s16_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union32 var42;
  orc_union32 var43;
  orc_union16 var44;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];

    /* 3: loadpw */
    var36.i = ex->params[24];
    /* 5: loadpl */
    var37.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr5[i];
    /* 1: loadw */
    var35 = ptr6[i];
    /* 2: addw */
    var40.i = var34.i + var35.i;
    /* 4: mulswl */
    var41.i = var40.i * var36.i;
    /* 6: addl */
    var42.i = var41.i + var37.i;
    /* 7: shrsl */
    var43.i = var42.i >> ex->params[26];
    /* 8: convlw */
    var44.i = var43.i;
    /* 9: loadw */
    var38 = ptr4[i];
    /* 10: subw */
    var39.i = var38.i - var44.i;
    /* 11: storew */
    ptr0[i] = var39;
  }

}

/* orc_mas2_sub_s16_ip */
static void
_backup_orc_mas2_sub_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union32 var42;
  orc_union32 var43;
  orc_union16 var44;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 3: loadpw */
    var36.i = ex->params[24];
    /* 5: loadpl */
    var37.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: loadw */
    var35 = ptr5[i];
    /* 2: addw */
    var40.i = var34.i + var35.i;
    /* 4: mulswl */
    var41.i = var40.i * var36.i;
    /* 6: addl */
    var42.i = var41.i + var37.i;
    /* 7: shrsl */
    var43.i = var42.i >> ex->params[26];
    /* 8: convlw */
    var44.i = var43.i;
    /* 9: loadw */
    var38 = ptr0[i];
    /* 10: subw */
    var39.i = var38.i - var44.i;
    /* 11: storew */
    ptr0[i] = var39;
  }

}

/* orc_mas4_across_add_s16_1991_op */
static void
_backup_orc_mas4_across_add_s16_1991_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  const orc_union16 * ORC_RESTRICT ptr7;
  const orc_union16 * ORC_RESTRICT ptr8;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union32 var45;
  orc_union16 var46;
  orc_union32 var47;
  orc_union32 var48;
  orc_union32 var49;
  orc_union32 var50;
  orc_union16 var51;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];
  ptr7 = (orc_union16 *)ex->arrays[7];
  ptr8 = (orc_union16 *)ex->arrays[8];

    /* 3: loadpw */
    var38.i = 0x00000009; /* 9 or 4.44659e-323f */
    /* 10: loadpl */
    var41.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var36 = ptr6[i];
    /* 1: loadw */
    var37 = ptr7[i];
    /* 2: addw */
    var44.i = var36.i + var37.i;
    /* 4: mulswl */
    var45.i = var44.i * var38.i;
    /* 5: loadw */
    var39 = ptr5[i];
    /* 6: loadw */
    var40 = ptr8[i];
    /* 7: addw */
    var46.i = var39.i + var40.i;
    /* 8: convswl */
    var47.i = var46.i;
    /* 9: subl */
    var48.i = var45.i - var47.i;
    /* 11: addl */
    var49.i = var48.i + var41.i;
    /* 12: shrsl */
    var50.i = var49.i >> ex->params[25];
    /* 13: convlw */
    var51.i = var50.i;
    /* 14: loadw */
    var42 = ptr4[i];
    /* 15: addw */
    var43.i = var42.i + var51.i;
    /* 16: storew */
    ptr0[i] = var43;
  }

}

/* orc_mas4_across_add_s16_1991_ip */
static void
_backup_orc_mas4_across_add_s16_1991_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union32 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union32 var47;
  orc_union32 var48;
  orc_union32 var49;
  orc_union32 var50;
  orc_union16 var51;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 3: loadpw */
    var36.i = 0x00000009; /* 9 or 4.44659e-323f */
    /* 10: loadpl */
    var37.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadoffw */
    var40 = ptr4[i + 1];
    /* 1: loadoffw */
    var41 = ptr4[i + 2];
    /* 2: addw */
    var42.i = var40.i + var41.i;
    /* 4: mulswl */
    var43.i = var42.i * var36.i;
    /* 5: loadw */
    var44 = ptr4[i];
    /* 6: loadoffw */
    var45 = ptr4[i + 3];
    /* 7: addw */
    var46.i = var44.i + var45.i;
    /* 8: convswl */
    var47.i = var46.i;
    /* 9: subl */
    var48.i = var43.i - var47.i;
    /* 11: addl */
    var49.i = var48.i + var37.i;
    /* 12: shrsl */
    var50.i = var49.i >> ex->params[25];
    /* 13: convlw */
    var51.i = var50.i;
    /* 14: loadw */
    var38 = ptr0[i];
    /* 15: addw */
    var39.i = var38.i + var51.i;
    /* 16: storew */
    ptr0[i] = var39;
  }

}

/* orc_mas4_across_sub_s16_1991_op */
static void
_backup_orc_mas4_across_sub_s16_1991_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  const orc_union16 * ORC_RESTRICT ptr6;
  const orc_union16 * ORC_RESTRICT ptr7;
  const orc_union16 * ORC_RESTRICT ptr8;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union32 var45;
  orc_union16 var46;
  orc_union32 var47;
  orc_union32 var48;
  orc_union32 var49;
  orc_union32 var50;
  orc_union16 var51;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];
  ptr6 = (orc_union16 *)ex->arrays[6];
  ptr7 = (orc_union16 *)ex->arrays[7];
  ptr8 = (orc_union16 *)ex->arrays[8];

    /* 3: loadpw */
    var38.i = 0x00000009; /* 9 or 4.44659e-323f */
    /* 10: loadpl */
    var41.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var36 = ptr6[i];
    /* 1: loadw */
    var37 = ptr7[i];
    /* 2: addw */
    var44.i = var36.i + var37.i;
    /* 4: mulswl */
    var45.i = var44.i * var38.i;
    /* 5: loadw */
    var39 = ptr5[i];
    /* 6: loadw */
    var40 = ptr8[i];
    /* 7: addw */
    var46.i = var39.i + var40.i;
    /* 8: convswl */
    var47.i = var46.i;
    /* 9: subl */
    var48.i = var45.i - var47.i;
    /* 11: addl */
    var49.i = var48.i + var41.i;
    /* 12: shrsl */
    var50.i = var49.i >> ex->params[25];
    /* 13: convlw */
    var51.i = var50.i;
    /* 14: loadw */
    var42 = ptr4[i];
    /* 15: subw */
    var43.i = var42.i - var51.i;
    /* 16: storew */
    ptr0[i] = var43;
  }

}

/* orc_mas4_across_sub_s16_1991_ip */
static void
_backup_orc_mas4_across_sub_s16_1991_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union32 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union32 var47;
  orc_union32 var48;
  orc_union32 var49;
  orc_union32 var50;
  orc_union16 var51;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 3: loadpw */
    var36.i = 0x00000009; /* 9 or 4.44659e-323f */
    /* 10: loadpl */
    var37.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadoffw */
    var40 = ptr4[i + 1];
    /* 1: loadoffw */
    var41 = ptr4[i + 2];
    /* 2: addw */
    var42.i = var40.i + var41.i;
    /* 4: mulswl */
    var43.i = var42.i * var36.i;
    /* 5: loadw */
    var44 = ptr4[i];
    /* 6: loadoffw */
    var45 = ptr4[i + 3];
    /* 7: addw */
    var46.i = var44.i + var45.i;
    /* 8: convswl */
    var47.i = var46.i;
    /* 9: subl */
    var48.i = var43.i - var47.i;
    /* 11: addl */
    var49.i = var48.i + var37.i;
    /* 12: shrsl */
    var50.i = var49.i >> ex->params[25];
    /* 13: convlw */
    var51.i = var50.i;
    /* 14: loadw */
    var38 = ptr0[i];
    /* 15: subw */
    var39.i = var38.i - var51.i;
    /* 16: storew */
    ptr0[i] = var39;
  }

}

/* orc_subtract_s16 */
static void
_backup_orc_subtract_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: loadw */
    var33 = ptr5[i];
    /* 2: subw */
    var34.i = var32.i - var33.i;
    /* 3: storew */
    ptr0[i] = var34;
  }

}

/* orc_add_s16_u8 */
static void
_backup_orc_add_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var33 = ptr5[i];
    /* 1: convubw */
    var36.i = (orc_uint8)var33;
    /* 2: loadw */
    var34 = ptr4[i];
    /* 3: addw */
    var35.i = var36.i + var34.i;
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_add_s16_u8_2d */
static void
_backup_orc_add_s16_u8_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  orc_int8 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var33 = ptr4[i];
      /* 1: convubw */
      var36.i = (orc_uint8)var33;
      /* 2: loadw */
      var34 = ptr0[i];
      /* 3: addw */
      var35.i = var34.i + var36.i;
      /* 4: storew */
      ptr0[i] = var35;
    }
  }

}

/* orc_convert_s16_u8 */
static void
_backup_orc_convert_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  orc_int8 var32;
  orc_union16 var33;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_int8 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var32 = ptr4[i];
    /* 1: convubw */
    var33.i = (orc_uint8)var32;
    /* 2: storew */
    ptr0[i] = var33;
  }

}

/* orc_convert_u8_s16 */
static void
_backup_orc_convert_u8_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_int8 var33;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: convsuswb */
    var33 = ORC_CLAMP_UB(var32.i);
    /* 2: storeb */
    ptr0[i] = var33;
  }

}

/* orc_offsetconvert_u8_s16 */
static void
_backup_orc_offsetconvert_u8_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var33;
  orc_union16 var34;
  orc_int8 var35;
  orc_union16 var36;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 1: loadpw */
    var34.i = 0x00000080; /* 128 or 6.32404e-322f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 2: addw */
    var36.i = var33.i + var34.i;
    /* 3: convsuswb */
    var35 = ORC_CLAMP_UB(var36.i);
    /* 4: storeb */
    ptr0[i] = var35;
  }

}

/* orc_offsetconvert_s16_u8 */
static void
_backup_orc_offsetconvert_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  orc_int8 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_int8 *)ex->arrays[4];

    /* 2: loadpw */
    var34.i = 0x00000080; /* 128 or 6.32404e-322f */

  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var33 = ptr4[i];
    /* 1: convubw */
    var36.i = (orc_uint8)var33;
    /* 3: subw */
    var35.i = var36.i - var34.i;
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_subtract_s16_u8 */
static void
_backup_orc_subtract_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var33 = ptr5[i];
    /* 1: convubw */
    var36.i = (orc_uint8)var33;
    /* 2: loadw */
    var34 = ptr4[i];
    /* 3: subw */
    var35.i = var34.i - var36.i;
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_multiply_and_add_s16_u8 */
static void
_backup_orc_multiply_and_add_s16_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var33 = ptr5[i];
    /* 1: convubw */
    var37.i = (orc_uint8)var33;
    /* 2: loadw */
    var34 = ptr4[i];
    /* 3: mullw */
    var38.i = (var37.i * var34.i) & 0xffff;
    /* 4: loadw */
    var35 = ptr0[i];
    /* 5: addw */
    var36.i = var35.i + var38.i;
    /* 6: storew */
    ptr0[i] = var36;
  }

}

/* orc_splat_s16_ns */
static void
_backup_orc_splat_s16_ns (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var32;
  orc_union16 var33;

  ptr0 = (orc_union16 *)ex->arrays[0];

    /* 0: loadpw */
    var32.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 1: copyw */
    var33.i = var32.i;
    /* 2: storew */
    ptr0[i] = var33;
  }

}

/* orc_splat_s16_2d_4xn */
static void
_backup_orc_splat_s16_2d_4xn (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 4;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var32;
  orc_union16 var33;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 0: loadpw */
        var32.i = ex->params[24];

    for (i = 0; i < n; i++) {
      /* 1: copyw */
      var33.i = var32.i;
      /* 2: storew */
      ptr0[i] = var33;
    }
  }

}

/* orc_splat_s16_2d_8xn */
static void
_backup_orc_splat_s16_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var32;
  orc_union16 var33;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 0: loadpw */
        var32.i = ex->params[24];

    for (i = 0; i < n; i++) {
      /* 1: copyw */
      var33.i = var32.i;
      /* 2: storew */
      ptr0[i] = var33;
    }
  }

}

/* orc_splat_s16_2d */
static void
_backup_orc_splat_s16_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var32;
  orc_union16 var33;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 0: loadpw */
        var32.i = ex->params[24];

    for (i = 0; i < n; i++) {
      /* 1: copyw */
      var33.i = var32.i;
      /* 2: storew */
      ptr0[i] = var33;
    }
  }

}

/* orc_splat_u8_ns */
static void
_backup_orc_splat_u8_ns (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  orc_int8 var32;
  orc_int8 var33;

  ptr0 = (orc_int8 *)ex->arrays[0];

    /* 0: loadpb */
    var32 = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 1: copyb */
    var33 = var32;
    /* 2: storeb */
    ptr0[i] = var33;
  }

}

/* orc_splat_u8_2d */
static void
_backup_orc_splat_u8_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 0: loadpb */
        var32 = ex->params[24];

    for (i = 0; i < n; i++) {
      /* 1: copyb */
      var33 = var32;
      /* 2: storeb */
      ptr0[i] = var33;
    }
  }

}

/* orc_average_u8 */
static void
_backup_orc_average_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_int8 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var32 = ptr4[i];
    /* 1: loadb */
    var33 = ptr5[i];
    /* 2: avgub */
    var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
    /* 3: storeb */
    ptr0[i] = var34;
  }

}

/* orc_rrshift6_add_s16_2d */
static void
_backup_orc_rrshift6_add_s16_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);

      /* 1: loadpw */
        var34.i = 0x00000020; /* 32 or 1.58101e-322f */

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var33 = ptr5[i];
      /* 2: addw */
      var37.i = var33.i + var34.i;
      /* 3: shrsw */
      var38.i = var37.i >> 6;
      /* 4: loadw */
      var35 = ptr4[i];
      /* 5: addw */
      var39.i = var35.i + var38.i;
      /* 6: convsuswb */
      var36 = ORC_CLAMP_UB(var39.i);
      /* 7: storeb */
      ptr0[i] = var36;
    }
  }

}

/* orc_rrshift6_sub_s16_2d */
static void
_backup_orc_rrshift6_sub_s16_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr1 = ORC_PTR_OFFSET(ex->arrays[1], ex->params[1] * j);

      /* 1: loadpw */
        var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var33 = ptr1[i];
      /* 2: subw */
      var38.i = var33.i - var34.i;
      /* 3: shrsw */
      var39.i = var38.i >> 6;
      /* 4: copyw */
      var35.i = var39.i;
      /* 5: storew */
      ptr1[i] = var35;
      /* 6: loadw */
      var36 = ptr0[i];
      /* 7: subw */
      var37.i = var36.i - var39.i;
      /* 8: storew */
      ptr0[i] = var37;
    }
  }

}

/* orc_rrshift6_s16_ip_2d */
static void
_backup_orc_rrshift6_s16_ip_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 1: loadpw */
        var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var33 = ptr0[i];
      /* 2: subw */
      var36.i = var33.i - var34.i;
      /* 3: shrsw */
      var35.i = var36.i >> 6;
      /* 4: storew */
      ptr0[i] = var35;
    }
  }

}

/* orc_rrshift6_s16_ip */
static void
_backup_orc_rrshift6_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];

    /* 1: loadpw */
    var34.i = 0x00001fe0; /* 8160 or 4.03158e-320f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr0[i];
    /* 2: subw */
    var36.i = var33.i - var34.i;
    /* 3: shrsw */
    var35.i = var36.i >> 6;
    /* 4: storew */
    ptr0[i] = var35;
  }

}

/* orc_unpack_yuyv_y */
static void
_backup_orc_unpack_yuyv_y (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_int8 var33;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: select0wb */
    var33 = (orc_uint16)var32.i & 0xff;
    /* 2: storeb */
    ptr0[i] = var33;
  }

}

/* orc_unpack_yuyv_u */
static void
_backup_orc_unpack_yuyv_u (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var33;
  orc_int8 var34;
  orc_union16 var35;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var33 = ptr4[i];
    /* 1: select0lw */
    var35.i = (orc_uint32)var33.i & 0xffff;
    /* 2: select1wb */
    var34 = ((orc_uint16)var35.i >> 8)&0xff;
    /* 3: storeb */
    ptr0[i] = var34;
  }

}

/* orc_unpack_yuyv_v */
static void
_backup_orc_unpack_yuyv_v (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var33;
  orc_int8 var34;
  orc_union16 var35;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var33 = ptr4[i];
    /* 1: select1lw */
    var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
    /* 2: select1wb */
    var34 = ((orc_uint16)var35.i >> 8)&0xff;
    /* 3: storeb */
    ptr0[i] = var34;
  }

}

/* orc_packyuyv */
static void
_backup_orc_packyuyv (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union32 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  orc_union16 var37;
  orc_int8 var38;
  orc_int8 var39;
  orc_union32 var40;
  orc_union16 var41;
  orc_int8 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;

  ptr0 = (orc_union32 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];
  ptr6 = (orc_int8 *)ex->arrays[6];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var37 = ptr4[i];
    /* 1: copyw */
    var41.i = var37.i;
    /* 2: select0wb */
    var42 = (orc_uint16)var41.i & 0xff;
    /* 3: select1wb */
    var43 = ((orc_uint16)var41.i >> 8)&0xff;
    /* 4: loadb */
    var38 = ptr5[i];
    /* 5: mergebw */
    var44.i = ((orc_uint8)var42 & 0x00ff) | ((orc_uint8)var38 << 8);
    /* 6: loadb */
    var39 = ptr6[i];
    /* 7: mergebw */
    var45.i = ((orc_uint8)var43 & 0x00ff) | ((orc_uint8)var39 << 8);
    /* 8: mergewl */
    var40.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
    /* 9: storel */
    ptr0[i] = var40;
  }

}

/* orc_unpack_uyvy_y */
static void
_backup_orc_unpack_uyvy_y (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_int8 var33;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: select1wb */
    var33 = ((orc_uint16)var32.i >> 8)&0xff;
    /* 2: storeb */
    ptr0[i] = var33;
  }

}

/* orc_unpack_uyvy_u */
static void
_backup_orc_unpack_uyvy_u (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var33;
  orc_int8 var34;
  orc_union16 var35;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var33 = ptr4[i];
    /* 1: select0lw */
    var35.i = (orc_uint32)var33.i & 0xffff;
    /* 2: select0wb */
    var34 = (orc_uint16)var35.i & 0xff;
    /* 3: storeb */
    ptr0[i] = var34;
  }

}

/* orc_unpack_uyvy_v */
static void
_backup_orc_unpack_uyvy_v (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var33;
  orc_int8 var34;
  orc_union16 var35;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var33 = ptr4[i];
    /* 1: select1lw */
    var35.i = ((orc_uint32)var33.i >> 16)&0xffff;
    /* 2: select0wb */
    var34 = (orc_uint16)var35.i & 0xff;
    /* 3: storeb */
    ptr0[i] = var34;
  }

}

/* orc_interleave2_s16 */
static void
_backup_orc_interleave2_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union32 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var32;
  orc_union16 var33;
  orc_union32 var34;

  ptr0 = (orc_union32 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr4[i];
    /* 1: loadw */
    var33 = ptr5[i];
    /* 2: mergewl */
    var34.i = ((orc_uint16)var32.i & 0x0000ffff) | ((orc_uint16)var33.i << 16);
    /* 3: storel */
    ptr0[i] = var34;
  }

}

/* orc_interleave2_rrshift1_s16 */
static void
_backup_orc_interleave2_rrshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union32 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union32 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;

  ptr0 = (orc_union32 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 1: loadpw */
    var35.i = 0x00000001; /* 1 or 4.94066e-324f */
    /* 5: loadpw */
    var37.i = 0x00000001; /* 1 or 4.94066e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 2: addw */
    var39.i = var34.i + var35.i;
    /* 3: shrsw */
    var40.i = var39.i >> 1;
    /* 4: loadw */
    var36 = ptr5[i];
    /* 6: addw */
    var41.i = var36.i + var37.i;
    /* 7: shrsw */
    var42.i = var41.i >> 1;
    /* 8: mergewl */
    var38.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var42.i << 16);
    /* 9: storel */
    ptr0[i] = var38;
  }

}

/* orc_deinterleave2_s16 */
static void
_backup_orc_deinterleave2_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union32 var36;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var33 = ptr4[i];
    /* 1: copyl */
    var36.i = var33.i;
    /* 2: select0lw */
    var34.i = (orc_uint32)var36.i & 0xffff;
    /* 3: storew */
    ptr0[i] = var34;
    /* 4: select1lw */
    var35.i = ((orc_uint32)var36.i >> 16)&0xffff;
    /* 5: storew */
    ptr1[i] = var35;
  }

}

/* orc_deinterleave2_lshift1_s16 */
static void
_backup_orc_deinterleave2_lshift1_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union32 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union32 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var35 = ptr4[i];
    /* 1: copyl */
    var38.i = var35.i;
    /* 2: select0lw */
    var39.i = (orc_uint32)var38.i & 0xffff;
    /* 3: shlw */
    var36.i = var39.i << 1;
    /* 4: storew */
    ptr0[i] = var36;
    /* 5: select1lw */
    var40.i = ((orc_uint32)var38.i >> 16)&0xffff;
    /* 6: shlw */
    var37.i = var40.i << 1;
    /* 7: storew */
    ptr1[i] = var37;
  }

}

/* orc_haar_deint_lshift1_split_s16 */
static void
_backup_orc_haar_deint_lshift1_split_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union32 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union32 *)ex->arrays[4];

    /* 9: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var35 = ptr4[i];
    /* 1: copyl */
    var39.i = var35.i;
    /* 2: select0lw */
    var40.i = (orc_uint32)var39.i & 0xffff;
    /* 3: select1lw */
    var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
    /* 4: shlw */
    var42.i = var40.i << 1;
    /* 5: shlw */
    var43.i = var41.i << 1;
    /* 6: subw */
    var44.i = var43.i - var42.i;
    /* 7: copyw */
    var36.i = var44.i;
    /* 8: storew */
    ptr1[i] = var36;
    /* 10: avgsw */
    var45.i = (var44.i + var37.i + 1)>>1;
    /* 11: addw */
    var38.i = var42.i + var45.i;
    /* 12: storew */
    ptr0[i] = var38;
  }

}

/* orc_haar_deint_split_s16 */
static void
_backup_orc_haar_deint_split_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union32 * ORC_RESTRICT ptr4;
  orc_union32 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union32 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union32 *)ex->arrays[4];

    /* 7: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadl */
    var35 = ptr4[i];
    /* 1: copyl */
    var39.i = var35.i;
    /* 2: select0lw */
    var40.i = (orc_uint32)var39.i & 0xffff;
    /* 3: select1lw */
    var41.i = ((orc_uint32)var39.i >> 16)&0xffff;
    /* 4: subw */
    var42.i = var41.i - var40.i;
    /* 5: copyw */
    var36.i = var42.i;
    /* 6: storew */
    ptr1[i] = var36;
    /* 8: avgsw */
    var43.i = (var42.i + var37.i + 1)>>1;
    /* 9: addw */
    var38.i = var40.i + var43.i;
    /* 10: storew */
    ptr0[i] = var38;
  }

}

/* orc_haar_split_s16_lo */
static void
_backup_orc_haar_split_s16_lo (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 4: loadpw */
    var36.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 2: loadw */
    var35 = ptr5[i];
    /* 3: subw */
    var39.i = var35.i - var38.i;
    /* 5: avgsw */
    var40.i = (var39.i + var36.i + 1)>>1;
    /* 6: addw */
    var37.i = var38.i + var40.i;
    /* 7: storew */
    ptr0[i] = var37;
  }

}

/* orc_haar_split_s16_hi */
static void
_backup_orc_haar_split_s16_hi (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr5[i];
    /* 1: loadw */
    var33 = ptr4[i];
    /* 2: subw */
    var34.i = var32.i - var33.i;
    /* 3: storew */
    ptr0[i] = var34;
  }

}

/* orc_haar_split_s16_op */
static void
_backup_orc_haar_split_s16_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 6: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: copyw */
    var39.i = var34.i;
    /* 2: loadw */
    var35 = ptr5[i];
    /* 3: subw */
    var40.i = var35.i - var39.i;
    /* 4: copyw */
    var36.i = var40.i;
    /* 5: storew */
    ptr1[i] = var36;
    /* 7: avgsw */
    var41.i = (var40.i + var37.i + 1)>>1;
    /* 8: addw */
    var38.i = var39.i + var41.i;
    /* 9: storew */
    ptr0[i] = var38;
  }

}

/* orc_haar_split_s16 */
static void
_backup_orc_haar_split_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];

    /* 7: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr0[i];
    /* 1: copyw */
    var39.i = var34.i;
    /* 2: loadw */
    var35 = ptr1[i];
    /* 3: copyw */
    var40.i = var35.i;
    /* 4: subw */
    var41.i = var40.i - var39.i;
    /* 5: copyw */
    var36.i = var41.i;
    /* 6: storew */
    ptr1[i] = var36;
    /* 8: avgsw */
    var42.i = (var41.i + var37.i + 1)>>1;
    /* 9: addw */
    var38.i = var39.i + var42.i;
    /* 10: storew */
    ptr0[i] = var38;
  }

}

/* orc_haar_synth_s16_lo */
static void
_backup_orc_haar_synth_s16_lo (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 1: loadpw */
    var34.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr5[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr4[i];
    /* 4: subw */
    var36.i = var35.i - var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_haar_synth_s16_hi */
static void
_backup_orc_haar_synth_s16_hi (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 2: loadpw */
    var36.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var35 = ptr5[i];
    /* 1: copyw */
    var39.i = var35.i;
    /* 3: avgsw */
    var40.i = (var39.i + var36.i + 1)>>1;
    /* 4: loadw */
    var37 = ptr4[i];
    /* 5: subw */
    var41.i = var37.i - var40.i;
    /* 6: addw */
    var38.i = var39.i + var41.i;
    /* 7: storew */
    ptr0[i] = var38;
  }

}

/* orc_haar_synth_s16_op */
static void
_backup_orc_haar_synth_s16_op (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 2: loadpw */
    var36.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var35 = ptr5[i];
    /* 1: copyw */
    var40.i = var35.i;
    /* 3: avgsw */
    var41.i = (var40.i + var36.i + 1)>>1;
    /* 4: loadw */
    var37 = ptr4[i];
    /* 5: subw */
    var42.i = var37.i - var41.i;
    /* 6: copyw */
    var38.i = var42.i;
    /* 7: storew */
    ptr0[i] = var38;
    /* 8: addw */
    var39.i = var40.i + var42.i;
    /* 9: storew */
    ptr1[i] = var39;
  }

}

/* orc_haar_synth_s16 */
static void
_backup_orc_haar_synth_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];

    /* 4: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var35 = ptr0[i];
    /* 1: copyw */
    var40.i = var35.i;
    /* 2: loadw */
    var36 = ptr1[i];
    /* 3: copyw */
    var41.i = var36.i;
    /* 5: avgsw */
    var42.i = (var41.i + var37.i + 1)>>1;
    /* 6: subw */
    var43.i = var40.i - var42.i;
    /* 7: copyw */
    var38.i = var43.i;
    /* 8: storew */
    ptr0[i] = var38;
    /* 9: addw */
    var39.i = var41.i + var43.i;
    /* 10: storew */
    ptr1[i] = var39;
  }

}

/* orc_haar_synth_rrshift1_int_s16 */
static void
_backup_orc_haar_synth_rrshift1_int_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union32 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union32 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;

  ptr0 = (orc_union32 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 2: loadpw */
    var35.i = 0x00000000; /* 0 or 0f */
    /* 7: loadpw */
    var37.i = 0x00000000; /* 0 or 0f */
    /* 9: loadpw */
    var38.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr5[i];
    /* 1: copyw */
    var40.i = var34.i;
    /* 3: avgsw */
    var41.i = (var40.i + var35.i + 1)>>1;
    /* 4: loadw */
    var36 = ptr4[i];
    /* 5: subw */
    var42.i = var36.i - var41.i;
    /* 6: addw */
    var43.i = var40.i + var42.i;
    /* 8: avgsw */
    var44.i = (var42.i + var37.i + 1)>>1;
    /* 10: avgsw */
    var45.i = (var43.i + var38.i + 1)>>1;
    /* 11: mergewl */
    var39.i = ((orc_uint16)var44.i & 0x0000ffff) | ((orc_uint16)var45.i << 16);
    /* 12: storel */
    ptr0[i] = var39;
  }

}

/* orc_haar_synth_int_s16 */
static void
_backup_orc_haar_synth_int_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union32 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union32 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;

  ptr0 = (orc_union32 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];

    /* 2: loadpw */
    var35.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr5[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 3: avgsw */
    var39.i = (var38.i + var35.i + 1)>>1;
    /* 4: loadw */
    var36 = ptr4[i];
    /* 5: subw */
    var40.i = var36.i - var39.i;
    /* 6: addw */
    var41.i = var38.i + var40.i;
    /* 7: mergewl */
    var37.i = ((orc_uint16)var40.i & 0x0000ffff) | ((orc_uint16)var41.i << 16);
    /* 8: storel */
    ptr0[i] = var37;
  }

}

/* orc_haar_sub_s16 */
static void
_backup_orc_haar_sub_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr0[i];
    /* 1: loadw */
    var33 = ptr4[i];
    /* 2: subw */
    var34.i = var32.i - var33.i;
    /* 3: storew */
    ptr0[i] = var34;
  }

}

/* orc_haar_add_half_s16 */
static void
_backup_orc_haar_add_half_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 1: loadpw */
    var34.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr0[i];
    /* 4: addw */
    var36.i = var35.i + var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_haar_add_s16 */
static void
_backup_orc_haar_add_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var32;
  orc_union16 var33;
  orc_union16 var34;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var32 = ptr0[i];
    /* 1: loadw */
    var33 = ptr4[i];
    /* 2: addw */
    var34.i = var32.i + var33.i;
    /* 3: storew */
    ptr0[i] = var34;
  }

}

/* orc_haar_sub_half_s16 */
static void
_backup_orc_haar_sub_half_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var33;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 1: loadpw */
    var34.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 2: avgsw */
    var37.i = (var33.i + var34.i + 1)>>1;
    /* 3: loadw */
    var35 = ptr0[i];
    /* 4: subw */
    var36.i = var35.i - var37.i;
    /* 5: storew */
    ptr0[i] = var36;
  }

}

/* orc_sum_u8 */
static void
_backup_orc_sum_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_int8 * ORC_RESTRICT ptr4;
  orc_union32 var12 =  { 0 };
  orc_int8 var34;
  orc_union16 var35;
  orc_union32 var36;

  ptr4 = (orc_int8 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var34 = ptr4[i];
    /* 1: convubw */
    var35.i = (orc_uint8)var34;
    /* 2: convuwl */
    var36.i = (orc_uint16)var35.i;
    /* 3: accl */
    var12.i = var12.i + var36.i;
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sum_s16 */
static void
_backup_orc_sum_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union32 var12 =  { 0 };
  orc_union16 var33;
  orc_union32 var34;

  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: convswl */
    var34.i = var33.i;
    /* 2: accl */
    var12.i = var12.i + var34.i;
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sum_square_diff_u8 */
static void
_backup_orc_sum_square_diff_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union32 var41;

  ptr4 = (orc_int8 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var35 = ptr4[i];
    /* 1: convubw */
    var37.i = (orc_uint8)var35;
    /* 2: loadb */
    var36 = ptr5[i];
    /* 3: convubw */
    var38.i = (orc_uint8)var36;
    /* 4: subw */
    var39.i = var37.i - var38.i;
    /* 5: mullw */
    var40.i = (var39.i * var39.i) & 0xffff;
    /* 6: convuwl */
    var41.i = (orc_uint16)var40.i;
    /* 7: accl */
    var12.i = var12.i + var41.i;
  }
  ex->accumulators[0] = var12.i;

}

/* orc_dequantise_s16_2d_4xn */
static void
_backup_orc_dequantise_s16_2d_4xn (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 4;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);

      /* 4: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var36.i = ex->params[25];

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var34 = ptr4[i];
      /* 1: copyw */
      var38.i = var34.i;
      /* 2: signw */
      var39.i = ORC_CLAMP(var38.i,-1,1);
      /* 3: absw */
      var40.i = ORC_ABS(var38.i);
      /* 5: mullw */
      var41.i = (var40.i * var35.i) & 0xffff;
      /* 7: addw */
      var42.i = var41.i + var36.i;
      /* 8: shrsw */
      var43.i = var42.i >> 2;
      /* 9: mullw */
      var37.i = (var43.i * var39.i) & 0xffff;
      /* 10: storew */
      ptr0[i] = var37;
    }
  }

}

/* orc_dequantise_s16_2d_8xn */
static void
_backup_orc_dequantise_s16_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);

      /* 4: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var36.i = ex->params[25];

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var34 = ptr4[i];
      /* 1: copyw */
      var38.i = var34.i;
      /* 2: signw */
      var39.i = ORC_CLAMP(var38.i,-1,1);
      /* 3: absw */
      var40.i = ORC_ABS(var38.i);
      /* 5: mullw */
      var41.i = (var40.i * var35.i) & 0xffff;
      /* 7: addw */
      var42.i = var41.i + var36.i;
      /* 8: shrsw */
      var43.i = var42.i >> 2;
      /* 9: mullw */
      var37.i = (var43.i * var39.i) & 0xffff;
      /* 10: storew */
      ptr0[i] = var37;
    }
  }

}

/* orc_dequantise_s16_ip_2d_8xn */
static void
_backup_orc_dequantise_s16_ip_2d_8xn (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 4: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var36.i = ex->params[25];

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var34 = ptr0[i];
      /* 1: copyw */
      var38.i = var34.i;
      /* 2: signw */
      var39.i = ORC_CLAMP(var38.i,-1,1);
      /* 3: absw */
      var40.i = ORC_ABS(var38.i);
      /* 5: mullw */
      var41.i = (var40.i * var35.i) & 0xffff;
      /* 7: addw */
      var42.i = var41.i + var36.i;
      /* 8: shrsw */
      var43.i = var42.i >> 2;
      /* 9: mullw */
      var37.i = (var43.i * var39.i) & 0xffff;
      /* 10: storew */
      ptr0[i] = var37;
    }
  }

}

/* orc_dequantise_s16_ip_2d */
static void
_backup_orc_dequantise_s16_ip_2d (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);

      /* 4: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var36.i = ex->params[25];

    for (i = 0; i < n; i++) {
      /* 0: loadw */
      var34 = ptr0[i];
      /* 1: copyw */
      var38.i = var34.i;
      /* 2: signw */
      var39.i = ORC_CLAMP(var38.i,-1,1);
      /* 3: absw */
      var40.i = ORC_ABS(var38.i);
      /* 5: mullw */
      var41.i = (var40.i * var35.i) & 0xffff;
      /* 7: addw */
      var42.i = var41.i + var36.i;
      /* 8: shrsw */
      var43.i = var42.i >> 2;
      /* 9: mullw */
      var37.i = (var43.i * var39.i) & 0xffff;
      /* 10: storew */
      ptr0[i] = var37;
    }
  }

}

/* orc_dequantise_s16_ip */
static void
_backup_orc_dequantise_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  ptr0 = (orc_union16 *)ex->arrays[0];

    /* 4: loadpw */
    var35.i = ex->params[24];
    /* 6: loadpw */
    var36.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr0[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 2: signw */
    var39.i = ORC_CLAMP(var38.i,-1,1);
    /* 3: absw */
    var40.i = ORC_ABS(var38.i);
    /* 5: mullw */
    var41.i = (var40.i * var35.i) & 0xffff;
    /* 7: addw */
    var42.i = var41.i + var36.i;
    /* 8: shrsw */
    var43.i = var42.i >> 2;
    /* 9: mullw */
    var37.i = (var43.i * var39.i) & 0xffff;
    /* 10: storew */
    ptr0[i] = var37;
  }

}

/* orc_dequantise_s16 */
static void
_backup_orc_dequantise_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 4: loadpw */
    var35.i = ex->params[24];
    /* 6: loadpw */
    var36.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 2: signw */
    var39.i = ORC_CLAMP(var38.i,-1,1);
    /* 3: absw */
    var40.i = ORC_ABS(var38.i);
    /* 5: mullw */
    var41.i = (var40.i * var35.i) & 0xffff;
    /* 7: addw */
    var42.i = var41.i + var36.i;
    /* 8: shrsw */
    var43.i = var42.i >> 2;
    /* 9: mullw */
    var37.i = (var43.i * var39.i) & 0xffff;
    /* 10: storew */
    ptr0[i] = var37;
  }

}

/* orc_dequantise_var_s16_ip */
static void
_backup_orc_dequantise_var_s16_ip (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  const orc_union16 * ORC_RESTRICT ptr5;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];
  ptr5 = (orc_union16 *)ex->arrays[5];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr0[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 2: signw */
    var39.i = ORC_CLAMP(var38.i,-1,1);
    /* 3: absw */
    var40.i = ORC_ABS(var38.i);
    /* 4: loadw */
    var35 = ptr4[i];
    /* 5: mullw */
    var41.i = (var40.i * var35.i) & 0xffff;
    /* 6: loadw */
    var36 = ptr5[i];
    /* 7: addw */
    var42.i = var41.i + var36.i;
    /* 8: shrsw */
    var43.i = var42.i >> 2;
    /* 9: mullw */
    var37.i = (var43.i * var39.i) & 0xffff;
    /* 10: storew */
    ptr0[i] = var37;
  }

}

/* orc_quantise1_s16 */
static void
_backup_orc_quantise1_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 5: loadpw */
    var35.i = ex->params[25];
    /* 7: loadpw */
    var36.i = ex->params[24];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: copyw */
    var38.i = var34.i;
    /* 2: signw */
    var39.i = ORC_CLAMP(var38.i,-1,1);
    /* 3: absw */
    var40.i = ORC_ABS(var38.i);
    /* 4: shlw */
    var41.i = var40.i << 2;
    /* 6: subw */
    var42.i = var41.i - var35.i;
    /* 8: mulhuw */
    var43.i = ((orc_uint32)((orc_uint16)var42.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
    /* 9: shruw */
    var44.i = ((orc_uint16)var43.i) >> ex->params[26];
    /* 10: mullw */
    var37.i = (var44.i * var39.i) & 0xffff;
    /* 11: storew */
    ptr0[i] = var37;
  }

}

/* orc_quantise2_s16 */
static void
_backup_orc_quantise2_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 5: loadpw */
    var35.i = ex->params[25];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: copyw */
    var37.i = var34.i;
    /* 2: signw */
    var38.i = ORC_CLAMP(var37.i,-1,1);
    /* 3: absw */
    var39.i = ORC_ABS(var37.i);
    /* 4: shlw */
    var40.i = var39.i << 2;
    /* 6: subw */
    var41.i = var40.i - var35.i;
    /* 7: shruw */
    var42.i = ((orc_uint16)var41.i) >> ex->params[24];
    /* 8: mullw */
    var36.i = (var42.i * var38.i) & 0xffff;
    /* 9: storew */
    ptr0[i] = var36;
  }

}

/* orc_quantdequant1_s16 */
static void
_backup_orc_quantdequant1_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];

    /* 5: loadpw */
    var35.i = ex->params[25];
    /* 7: loadpw */
    var36.i = ex->params[24];
    /* 14: loadpw */
    var38.i = ex->params[27];
    /* 16: loadpw */
    var39.i = ex->params[28];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr1[i];
    /* 1: copyw */
    var41.i = var34.i;
    /* 2: signw */
    var42.i = ORC_CLAMP(var41.i,-1,1);
    /* 3: absw */
    var43.i = ORC_ABS(var41.i);
    /* 4: shlw */
    var44.i = var43.i << 2;
    /* 6: subw */
    var45.i = var44.i - var35.i;
    /* 8: mulhuw */
    var46.i = ((orc_uint32)((orc_uint16)var45.i) * (orc_uint32)((orc_uint16)var36.i)) >> 16;
    /* 9: shruw */
    var47.i = ((orc_uint16)var46.i) >> ex->params[26];
    /* 10: mullw */
    var48.i = (var47.i * var42.i) & 0xffff;
    /* 11: copyw */
    var37.i = var48.i;
    /* 12: storew */
    ptr0[i] = var37;
    /* 13: signw */
    var49.i = ORC_CLAMP(var48.i,-1,1);
    /* 15: mullw */
    var50.i = (var47.i * var38.i) & 0xffff;
    /* 17: addw */
    var51.i = var50.i + var39.i;
    /* 18: shrsw */
    var52.i = var51.i >> 2;
    /* 19: mullw */
    var40.i = (var52.i * var49.i) & 0xffff;
    /* 20: storew */
    ptr1[i] = var40;
  }

}

/* orc_quantdequant3_s16 */
static void
_backup_orc_quantdequant3_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union32 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union32 var48;
  orc_union32 var49;
  orc_union32 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;
  orc_union16 var56;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];

    /* 5: loadpw */
    var36.i = ex->params[25];
    /* 7: loadpw */
    var37.i = ex->params[24];
    /* 9: loadpl */
    var38.i = ex->params[29];
    /* 17: loadpw */
    var40.i = ex->params[27];
    /* 19: loadpw */
    var41.i = ex->params[28];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var35 = ptr1[i];
    /* 1: copyw */
    var43.i = var35.i;
    /* 2: signw */
    var44.i = ORC_CLAMP(var43.i,-1,1);
    /* 3: absw */
    var45.i = ORC_ABS(var43.i);
    /* 4: shlw */
    var46.i = var45.i << 2;
    /* 6: subw */
    var47.i = var46.i - var36.i;
    /* 8: muluwl */
    var48.i = ((orc_uint16)var47.i) * ((orc_uint16)var37.i);
    /* 10: addl */
    var49.i = var48.i + var38.i;
    /* 11: shrul */
    var50.i = ((orc_uint32)var49.i) >> ex->params[26];
    /* 12: convlw */
    var51.i = var50.i;
    /* 13: mullw */
    var52.i = (var51.i * var44.i) & 0xffff;
    /* 14: copyw */
    var39.i = var52.i;
    /* 15: storew */
    ptr0[i] = var39;
    /* 16: signw */
    var53.i = ORC_CLAMP(var52.i,-1,1);
    /* 18: mullw */
    var54.i = (var51.i * var40.i) & 0xffff;
    /* 20: addw */
    var55.i = var54.i + var41.i;
    /* 21: shrsw */
    var56.i = var55.i >> 2;
    /* 22: mullw */
    var42.i = (var56.i * var53.i) & 0xffff;
    /* 23: storew */
    ptr1[i] = var42;
  }

}

/* orc_quantdequant2_s16 */
static void
_backup_orc_quantdequant2_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_union16 * ORC_RESTRICT ptr0;
  orc_union16 * ORC_RESTRICT ptr1;
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;

  ptr0 = (orc_union16 *)ex->arrays[0];
  ptr1 = (orc_union16 *)ex->arrays[1];

    /* 5: loadpw */
    var35.i = ex->params[25];
    /* 12: loadpw */
    var37.i = ex->params[26];
    /* 14: loadpw */
    var38.i = ex->params[27];

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr1[i];
    /* 1: copyw */
    var40.i = var34.i;
    /* 2: signw */
    var41.i = ORC_CLAMP(var40.i,-1,1);
    /* 3: absw */
    var42.i = ORC_ABS(var40.i);
    /* 4: shlw */
    var43.i = var42.i << 2;
    /* 6: subw */
    var44.i = var43.i - var35.i;
    /* 7: shruw */
    var45.i = ((orc_uint16)var44.i) >> ex->params[24];
    /* 8: mullw */
    var46.i = (var45.i * var41.i) & 0xffff;
    /* 9: copyw */
    var36.i = var46.i;
    /* 10: storew */
    ptr0[i] = var36;
    /* 11: signw */
    var47.i = ORC_CLAMP(var46.i,-1,1);
    /* 13: mullw */
    var48.i = (var45.i * var37.i) & 0xffff;
    /* 15: addw */
    var49.i = var48.i + var38.i;
    /* 16: shrsw */
    var50.i = var49.i >> 2;
    /* 17: mullw */
    var39.i = (var50.i * var47.i) & 0xffff;
    /* 18: storew */
    ptr1[i] = var39;
  }

}

/* orc_downsample_vert_u8 */
static void
_backup_orc_downsample_vert_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_int8 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_int8 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_int8 *)ex->arrays[4];
  ptr5 = (orc_int8 *)ex->arrays[5];
  ptr6 = (orc_int8 *)ex->arrays[6];
  ptr7 = (orc_int8 *)ex->arrays[7];

    /* 5: loadpw */
    var37.i = 0x00000006; /* 6 or 2.96439e-323f */
    /* 12: loadpw */
    var40.i = 0x0000001a; /* 26 or 1.28457e-322f */
    /* 15: loadpw */
    var41.i = 0x00000020; /* 32 or 1.58101e-322f */

  for (i = 0; i < n; i++) {
    /* 0: loadb */
    var35 = ptr4[i];
    /* 1: convubw */
    var43.i = (orc_uint8)var35;
    /* 2: loadb */
    var36 = ptr7[i];
    /* 3: convubw */
    var44.i = (orc_uint8)var36;
    /* 4: addw */
    var45.i = var43.i + var44.i;
    /* 6: mullw */
    var46.i = (var45.i * var37.i) & 0xffff;
    /* 7: loadb */
    var38 = ptr5[i];
    /* 8: convubw */
    var47.i = (orc_uint8)var38;
    /* 9: loadb */
    var39 = ptr6[i];
    /* 10: convubw */
    var48.i = (orc_uint8)var39;
    /* 11: addw */
    var49.i = var47.i + var48.i;
    /* 13: mullw */
    var50.i = (var49.i * var40.i) & 0xffff;
    /* 14: addw */
    var51.i = var50.i + var46.i;
    /* 16: addw */
    var52.i = var51.i + var41.i;
    /* 17: shruw */
    var53.i = ((orc_uint16)var52.i) >> 6;
    /* 18: convwb */
    var42 = var53.i;
    /* 19: storeb */
    ptr0[i] = var42;
  }

}

/* orc_downsample_horiz_u8 */
static void
_backup_orc_downsample_horiz_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_int8 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_int8 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_int8 var51;
  orc_int8 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;
  orc_union16 var56;
  orc_union16 var57;
  orc_union16 var58;
  orc_union16 var59;

  ptr0 = (orc_int8 *)ex->arrays[0];
  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 7: loadpw */
    var38.i = 0x00000006; /* 6 or 2.96439e-323f */
    /* 14: loadpw */
    var39.i = 0x0000001a; /* 26 or 1.28457e-322f */
    /* 17: loadpw */
    var40.i = 0x00000020; /* 32 or 1.58101e-322f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var42 = ptr4[i];
    /* 1: select1wb */
    var43 = ((orc_uint16)var42.i >> 8)&0xff;
    /* 2: convubw */
    var44.i = (orc_uint8)var43;
    /* 3: loadoffw */
    var45 = ptr4[i + 2];
    /* 4: select0wb */
    var46 = (orc_uint16)var45.i & 0xff;
    /* 5: convubw */
    var47.i = (orc_uint8)var46;
    /* 6: addw */
    var48.i = var44.i + var47.i;
    /* 8: mullw */
    var49.i = (var48.i * var38.i) & 0xffff;
    /* 9: loadoffw */
    var50 = ptr4[i + 1];
    /* 10: splitwb */
    var51 = (var50.i >> 8) & 0xff;
    var52 = var50.i & 0xff;
    /* 11: convubw */
    var53.i = (orc_uint8)var51;
    /* 12: convubw */
    var54.i = (orc_uint8)var52;
    /* 13: addw */
    var55.i = var53.i + var54.i;
    /* 15: mullw */
    var56.i = (var55.i * var39.i) & 0xffff;
    /* 16: addw */
    var57.i = var49.i + var56.i;
    /* 18: addw */
    var58.i = var57.i + var40.i;
    /* 19: shruw */
    var59.i = ((orc_uint16)var58.i) >> 6;
    /* 20: convwb */
    var41 = var59.i;
    /* 21: storeb */
    ptr0[i] = var41;
  }

}

/* orc_stats_moment_s16 */
static void
_backup_orc_stats_moment_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union32 var12 =  { 0 };
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union32 var40;

  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 2: loadpw */
    var35.i = 0x00000002; /* 2 or 9.88131e-324f */
    /* 4: loadpw */
    var36.i = 0x00000000; /* 0 or 0f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: absw */
    var37.i = ORC_ABS(var34.i);
    /* 3: subw */
    var38.i = var37.i - var35.i;
    /* 5: maxsw */
    var39.i = ORC_MAX(var38.i, var36.i);
    /* 6: convuwl */
    var40.i = (orc_uint16)var39.i;
    /* 7: accl */
    var12.i = var12.i + var40.i;
  }
  ex->accumulators[0] = var12.i;

}

/* orc_stats_above_s16 */
static void
_backup_orc_stats_above_s16 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union32 var12 =  { 0 };
  orc_union16 var34;
  orc_union16 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_union16 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union32 var42;

  ptr4 = (orc_union16 *)ex->arrays[4];

    /* 2: loadpw */
    var35.i = 0x00000001; /* 1 or 4.94066e-324f */
    /* 4: loadpw */
    var36.i = 0x00000000; /* 0 or 0f */
    /* 6: loadpw */
    var37.i = 0x00000001; /* 1 or 4.94066e-324f */

  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var34 = ptr4[i];
    /* 1: absw */
    var38.i = ORC_ABS(var34.i);
    /* 3: subw */
    var39.i = var38.i - var35.i;
    /* 5: maxsw */
    var40.i = ORC_MAX(var39.i, var36.i);
    /* 7: minsw */
    var41.i = ORC_MIN(var40.i, var37.i);
    /* 8: convuwl */
    var42.i = (orc_uint16)var41.i;
    /* 9: accl */
    var12.i = var12.i + var42.i;
  }
  ex->accumulators[0] = var12.i;

}

/* orc_accw */
static void
_backup_orc_accw (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int n = ex->n;
  const orc_union16 * ORC_RESTRICT ptr4;
  orc_union16 var12 =  { 0 };
  orc_union16 var33;
  orc_union16 var34;

  ptr4 = (orc_union16 *)ex->arrays[4];


  for (i = 0; i < n; i++) {
    /* 0: loadw */
    var33 = ptr4[i];
    /* 1: absw */
    var34.i = ORC_ABS(var33.i);
    /* 2: accw */
    var12.i = var12.i + var34.i;
  }
  ex->accumulators[0] = (var12.i & 0xffff);

}

/* orc_avg2_8xn_u8 */
static void
_backup_orc_avg2_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: avgub */
      var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
      /* 3: storeb */
      ptr0[i] = var34;
    }
  }

}

/* orc_avg2_12xn_u8 */
static void
_backup_orc_avg2_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 12;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: avgub */
      var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
      /* 3: storeb */
      ptr0[i] = var34;
    }
  }

}

/* orc_avg2_16xn_u8 */
static void
_backup_orc_avg2_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 16;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: avgub */
      var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
      /* 3: storeb */
      ptr0[i] = var34;
    }
  }

}

/* orc_avg2_32xn_u8 */
static void
_backup_orc_avg2_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 32;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: avgub */
      var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
      /* 3: storeb */
      ptr0[i] = var34;
    }
  }

}

/* orc_avg2_nxm_u8 */
static void
_backup_orc_avg2_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var32;
  orc_int8 var33;
  orc_int8 var34;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: avgub */
      var34 = ((orc_uint8)var32 + (orc_uint8)var33 + 1)>>1;
      /* 3: storeb */
      ptr0[i] = var34;
    }
  }

}

/* orc_combine4_8xn_u8 */
static void
_backup_orc_combine4_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: convsuswb */
      var43 = ORC_CLAMP_UB(var55.i);
      /* 22: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine4_12xn_u8 */
static void
_backup_orc_combine4_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 12;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: convsuswb */
      var43 = ORC_CLAMP_UB(var55.i);
      /* 22: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine4_16xn_u8 */
static void
_backup_orc_combine4_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 16;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: convsuswb */
      var43 = ORC_CLAMP_UB(var55.i);
      /* 22: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine4_24xn_u8 */
static void
_backup_orc_combine4_24xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 24;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: convsuswb */
      var43 = ORC_CLAMP_UB(var55.i);
      /* 22: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine4_32xn_u8 */
static void
_backup_orc_combine4_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 32;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: convsuswb */
      var43 = ORC_CLAMP_UB(var55.i);
      /* 22: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine4_nxm_u8 */
static void
_backup_orc_combine4_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  const orc_int8 * ORC_RESTRICT ptr6;
  const orc_int8 * ORC_RESTRICT ptr7;
  orc_int8 var34;
  orc_union16 var35;
  orc_int8 var36;
  orc_union16 var37;
  orc_int8 var38;
  orc_union16 var39;
  orc_int8 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_int8 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;
  orc_union16 var47;
  orc_union16 var48;
  orc_union16 var49;
  orc_union16 var50;
  orc_union16 var51;
  orc_union16 var52;
  orc_union16 var53;
  orc_union16 var54;
  orc_union16 var55;
  orc_union16 var56;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
    ptr6 = ORC_PTR_OFFSET(ex->arrays[6], ex->params[6] * j);
    ptr7 = ORC_PTR_OFFSET(ex->arrays[7], ex->params[7] * j);

      /* 2: loadpw */
        var35.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 11: loadpw */
        var39.i = ex->params[26];
      /* 16: loadpw */
        var41.i = ex->params[27];
      /* 19: loadpw */
        var42.i = 0x00000008; /* 8 or 3.95253e-323f */

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var44.i = (orc_uint8)var34;
      /* 3: mullw */
      var45.i = (var44.i * var35.i) & 0xffff;
      /* 4: loadb */
      var36 = ptr5[i];
      /* 5: convubw */
      var46.i = (orc_uint8)var36;
      /* 7: mullw */
      var47.i = (var46.i * var37.i) & 0xffff;
      /* 8: addw */
      var48.i = var45.i + var47.i;
      /* 9: loadb */
      var38 = ptr6[i];
      /* 10: convubw */
      var49.i = (orc_uint8)var38;
      /* 12: mullw */
      var50.i = (var49.i * var39.i) & 0xffff;
      /* 13: addw */
      var51.i = var48.i + var50.i;
      /* 14: loadb */
      var40 = ptr7[i];
      /* 15: convubw */
      var52.i = (orc_uint8)var40;
      /* 17: mullw */
      var53.i = (var52.i * var41.i) & 0xffff;
      /* 18: addw */
      var54.i = var51.i + var53.i;
      /* 20: addw */
      var55.i = var54.i + var42.i;
      /* 21: shrsw */
      var56.i = var55.i >> 4;
      /* 22: convsuswb */
      var43 = ORC_CLAMP_UB(var56.i);
      /* 23: storeb */
      ptr0[i] = var43;
    }
  }

}

/* orc_combine2_8xn_u8 */
static void
_backup_orc_combine2_8xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var34;
  orc_int8 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_int8 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);

      /* 4: loadpw */
        var36.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 9: loadpw */
        var38.i = ex->params[26];

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var40.i = (orc_uint8)var34;
      /* 2: loadb */
      var35 = ptr5[i];
      /* 3: convubw */
      var41.i = (orc_uint8)var35;
      /* 5: mullw */
      var42.i = (var40.i * var36.i) & 0xffff;
      /* 7: mullw */
      var43.i = (var41.i * var37.i) & 0xffff;
      /* 8: addw */
      var44.i = var42.i + var43.i;
      /* 10: addw */
      var45.i = var44.i + var38.i;
      /* 11: shrsw */
      var46.i = var45.i >> ex->params[27];
      /* 12: convsuswb */
      var39 = ORC_CLAMP_UB(var46.i);
      /* 13: storeb */
      ptr0[i] = var39;
    }
  }

}

/* orc_combine2_12xn_u8 */
static void
_backup_orc_combine2_12xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 12;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var34;
  orc_int8 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_int8 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);

      /* 4: loadpw */
        var36.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 9: loadpw */
        var38.i = ex->params[26];

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var40.i = (orc_uint8)var34;
      /* 2: loadb */
      var35 = ptr5[i];
      /* 3: convubw */
      var41.i = (orc_uint8)var35;
      /* 5: mullw */
      var42.i = (var40.i * var36.i) & 0xffff;
      /* 7: mullw */
      var43.i = (var41.i * var37.i) & 0xffff;
      /* 8: addw */
      var44.i = var42.i + var43.i;
      /* 10: addw */
      var45.i = var44.i + var38.i;
      /* 11: shrsw */
      var46.i = var45.i >> ex->params[27];
      /* 12: convsuswb */
      var39 = ORC_CLAMP_UB(var46.i);
      /* 13: storeb */
      ptr0[i] = var39;
    }
  }

}

/* orc_combine2_16xn_u8 */
static void
_backup_orc_combine2_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 16;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var34;
  orc_int8 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_int8 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);

      /* 4: loadpw */
        var36.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 9: loadpw */
        var38.i = ex->params[26];

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var40.i = (orc_uint8)var34;
      /* 2: loadb */
      var35 = ptr5[i];
      /* 3: convubw */
      var41.i = (orc_uint8)var35;
      /* 5: mullw */
      var42.i = (var40.i * var36.i) & 0xffff;
      /* 7: mullw */
      var43.i = (var41.i * var37.i) & 0xffff;
      /* 8: addw */
      var44.i = var42.i + var43.i;
      /* 10: addw */
      var45.i = var44.i + var38.i;
      /* 11: shrsw */
      var46.i = var45.i >> ex->params[27];
      /* 12: convsuswb */
      var39 = ORC_CLAMP_UB(var46.i);
      /* 13: storeb */
      ptr0[i] = var39;
    }
  }

}

/* orc_combine2_nxm_u8 */
static void
_backup_orc_combine2_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 * ORC_RESTRICT ptr0;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_int8 var34;
  orc_int8 var35;
  orc_union16 var36;
  orc_union16 var37;
  orc_union16 var38;
  orc_int8 var39;
  orc_union16 var40;
  orc_union16 var41;
  orc_union16 var42;
  orc_union16 var43;
  orc_union16 var44;
  orc_union16 var45;
  orc_union16 var46;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET(ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);

      /* 4: loadpw */
        var36.i = ex->params[24];
      /* 6: loadpw */
        var37.i = ex->params[25];
      /* 9: loadpw */
        var38.i = ex->params[26];

    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var34 = ptr4[i];
      /* 1: convubw */
      var40.i = (orc_uint8)var34;
      /* 2: loadb */
      var35 = ptr5[i];
      /* 3: convubw */
      var41.i = (orc_uint8)var35;
      /* 5: mullw */
      var42.i = (var40.i * var36.i) & 0xffff;
      /* 7: mullw */
      var43.i = (var41.i * var37.i) & 0xffff;
      /* 8: addw */
      var44.i = var42.i + var43.i;
      /* 10: addw */
      var45.i = var44.i + var38.i;
      /* 11: shrsw */
      var46.i = var45.i >> ex->params[27];
      /* 12: convsuswb */
      var39 = ORC_CLAMP_UB(var46.i);
      /* 13: storeb */
      ptr0[i] = var39;
    }
  }

}

/* orc_sad_nxm_u8 */
static void
_backup_orc_sad_nxm_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: accsadubl */
      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
    }
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sad_8x8_u8 */
static void
_backup_orc_sad_8x8_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 8;
  int m = 8;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: accsadubl */
      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
    }
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sad_12x12_u8 */
static void
_backup_orc_sad_12x12_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 12;
  int m = 12;
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: accsadubl */
      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
    }
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sad_16xn_u8 */
static void
_backup_orc_sad_16xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 16;
  int m = ex->params[ORC_VAR_A1];
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: accsadubl */
      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
    }
  }
  ex->accumulators[0] = var12.i;

}

/* orc_sad_32xn_u8 */
static void
_backup_orc_sad_32xn_u8 (OrcExecutor * ORC_RESTRICT ex)
{
  int i;
  int j;
  int n = 32;
  int m = ex->params[ORC_VAR_A1];
  const orc_int8 * ORC_RESTRICT ptr4;
  const orc_int8 * ORC_RESTRICT ptr5;
  orc_union32 var12 =  { 0 };
  orc_int8 var32;
  orc_int8 var33;

  for (j = 0; j < m; j++) {
    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);


    for (i = 0; i < n; i++) {
      /* 0: loadb */
      var32 = ptr4[i];
      /* 1: loadb */
      var33 = ptr5[i];
      /* 2: accsadubl */
      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
    }
  }
  ex->accumulators[0] = var12.i;

}


static int quiet = 0;
static int benchmark = 0;

static void help (const char *argv0)
{
  printf("Usage:\n");
  printf("  %s [OPTION]\n", argv0);
  printf("Help Options:\n");
  printf("  -h, --help          Show help options\n");
  printf("Application Options:\n");
  printf("  -b, --benchmark     Run benchmark and show results\n");
  printf("  -q, --quiet         Don't output anything except on failures\n");

  exit(0);
}

int
main (int argc, char *argv[])
{
  int error = FALSE;
  int i;

  orc_test_init ();

  for(i=1;i<argc;i++) {
    if (strcmp(argv[i], "--help") == 0 ||
      strcmp(argv[i], "-h") == 0) {
      help(argv[0]);
    } else if (strcmp(argv[i], "--quiet") == 0 ||
      strcmp(argv[i], "-q") == 0) {
      quiet = 1;
      benchmark = 0;
    } else if (strcmp(argv[i], "--benchmark") == 0 ||
      strcmp(argv[i], "-b") == 0) {
      benchmark = 1;
      quiet = 0;
    }
  }

  /* orc_add2_rshift_add_s16_22_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_add_s16_22_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_add_s16_22_op");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_22_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_add_s16_22 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_add_s16_22:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_add_s16_22");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_22);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_sub_s16_22_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_sub_s16_22_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_sub_s16_22_op");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_22_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_sub_s16_22 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_sub_s16_22:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_sub_s16_22");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_22);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_add_s16_11_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_add_s16_11_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_add_s16_11_op");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_11_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_add_s16_11 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_add_s16_11:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_add_s16_11");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_add_s16_11);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_sub_s16_11_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_sub_s16_11_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_sub_s16_11_op");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_11_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add2_rshift_sub_s16_11 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add2_rshift_sub_s16_11:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add2_rshift_sub_s16_11");
    orc_program_set_backup_function (p, _backup_orc_add2_rshift_sub_s16_11);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_const_rshift_s16_11 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_const_rshift_s16_11:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add_const_rshift_s16_11");
    orc_program_set_backup_function (p, _backup_orc_add_const_rshift_s16_11);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_const_rshift_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_const_rshift_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add_const_rshift_s16");
    orc_program_set_backup_function (p, _backup_orc_add_const_rshift_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add_s16");
    orc_program_set_backup_function (p, _backup_orc_add_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_s16_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_s16_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_add_s16_2d");
    orc_program_set_backup_function (p, _backup_orc_add_s16_2d);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_addc_rshift_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_addc_rshift_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_addc_rshift_s16");
    orc_program_set_backup_function (p, _backup_orc_addc_rshift_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_lshift1_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_lshift1_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_lshift1_s16");
    orc_program_set_backup_function (p, _backup_orc_lshift1_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");

      orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_lshift2_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_lshift2_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_lshift2_s16");
    orc_program_set_backup_function (p, _backup_orc_lshift2_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");

      orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_lshift_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_lshift_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_lshift_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_lshift_s16_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");

      orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas2_add_s16_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas2_add_s16_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas2_add_s16_op");
    orc_program_set_backup_function (p, _backup_orc_mas2_add_s16_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_parameter (p, 4, "p3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas2_add_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas2_add_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas2_add_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_mas2_add_s16_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_parameter (p, 4, "p3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas2_sub_s16_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas2_sub_s16_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas2_sub_s16_op");
    orc_program_set_backup_function (p, _backup_orc_mas2_sub_s16_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_parameter (p, 4, "p3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas2_sub_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas2_sub_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas2_sub_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_mas2_sub_s16_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_parameter (p, 4, "p3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas4_across_add_s16_1991_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas4_across_add_s16_1991_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas4_across_add_s16_1991_op");
    orc_program_set_backup_function (p, _backup_orc_mas4_across_add_s16_1991_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_source (p, 2, "s4");
    orc_program_add_source (p, 2, "s5");
      orc_program_add_constant (p, 4, 0x00000009, "c1");
    orc_program_add_parameter (p, 4, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");
    orc_program_add_temporary (p, 4, "t4");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_S4, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S5, ORC_VAR_D1);
      orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas4_across_add_s16_1991_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas4_across_add_s16_1991_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas4_across_add_s16_1991_ip");
    orc_program_set_backup_function (p, _backup_orc_mas4_across_add_s16_1991_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
      orc_program_add_constant (p, 4, 0x00000002, "c2");
      orc_program_add_constant (p, 4, 0x00000009, "c3");
      orc_program_add_constant (p, 4, 0x00000003, "c4");
    orc_program_add_parameter (p, 4, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");
    orc_program_add_temporary (p, 4, "t4");

      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C3, ORC_VAR_D1);
      orc_program_append_2 (p, "loadw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_C4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas4_across_sub_s16_1991_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas4_across_sub_s16_1991_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas4_across_sub_s16_1991_op");
    orc_program_set_backup_function (p, _backup_orc_mas4_across_sub_s16_1991_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
    orc_program_add_source (p, 2, "s3");
    orc_program_add_source (p, 2, "s4");
    orc_program_add_source (p, 2, "s5");
      orc_program_add_constant (p, 4, 0x00000009, "c1");
    orc_program_add_parameter (p, 4, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");
    orc_program_add_temporary (p, 4, "t4");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_S4, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S5, ORC_VAR_D1);
      orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_mas4_across_sub_s16_1991_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_mas4_across_sub_s16_1991_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_mas4_across_sub_s16_1991_ip");
    orc_program_set_backup_function (p, _backup_orc_mas4_across_sub_s16_1991_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
      orc_program_add_constant (p, 4, 0x00000002, "c2");
      orc_program_add_constant (p, 4, 0x00000009, "c3");
      orc_program_add_constant (p, 4, 0x00000003, "c4");
    orc_program_add_parameter (p, 4, "p1");
    orc_program_add_parameter (p, 4, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");
    orc_program_add_temporary (p, 4, "t4");

      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulswl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_C3, ORC_VAR_D1);
      orc_program_append_2 (p, "loadw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_C4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "convswl", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_subtract_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_subtract_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_subtract_s16");
    orc_program_set_backup_function (p, _backup_orc_subtract_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_s16_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_s16_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_add_s16_u8");
    orc_program_set_backup_function (p, _backup_orc_add_s16_u8);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_add_s16_u8_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_add_s16_u8_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_add_s16_u8_2d");
    orc_program_set_backup_function (p, _backup_orc_add_s16_u8_2d);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_convert_s16_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_convert_s16_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_convert_s16_u8");
    orc_program_set_backup_function (p, _backup_orc_convert_s16_u8);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 1, "s1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_convert_u8_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_convert_u8_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_convert_u8_s16");
    orc_program_set_backup_function (p, _backup_orc_convert_u8_s16);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_offsetconvert_u8_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_offsetconvert_u8_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_offsetconvert_u8_s16");
    orc_program_set_backup_function (p, _backup_orc_offsetconvert_u8_s16);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000080, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_offsetconvert_s16_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_offsetconvert_s16_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_offsetconvert_s16_u8");
    orc_program_set_backup_function (p, _backup_orc_offsetconvert_s16_u8);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 1, "s1");
      orc_program_add_constant (p, 4, 0x00000080, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_subtract_s16_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_subtract_s16_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_subtract_s16_u8");
    orc_program_set_backup_function (p, _backup_orc_subtract_s16_u8);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_multiply_and_add_s16_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_multiply_and_add_s16_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_multiply_and_add_s16_u8");
    orc_program_set_backup_function (p, _backup_orc_multiply_and_add_s16_u8);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_s16_ns */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_s16_ns:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_splat_s16_ns");
    orc_program_set_backup_function (p, _backup_orc_splat_s16_ns);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_s16_2d_4xn */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_s16_2d_4xn:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 4);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_splat_s16_2d_4xn");
    orc_program_set_backup_function (p, _backup_orc_splat_s16_2d_4xn);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_s16_2d_8xn */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_s16_2d_8xn:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_splat_s16_2d_8xn");
    orc_program_set_backup_function (p, _backup_orc_splat_s16_2d_8xn);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_s16_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_s16_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_splat_s16_2d");
    orc_program_set_backup_function (p, _backup_orc_splat_s16_2d);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_parameter (p, 2, "p1");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_u8_ns */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_u8_ns:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_splat_u8_ns");
    orc_program_set_backup_function (p, _backup_orc_splat_u8_ns);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_parameter (p, 1, "p1");

      orc_program_append_2 (p, "copyb", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_splat_u8_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_splat_u8_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_splat_u8_2d");
    orc_program_set_backup_function (p, _backup_orc_splat_u8_2d);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_parameter (p, 1, "p1");

      orc_program_append_2 (p, "copyb", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_average_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_average_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_average_u8");
    orc_program_set_backup_function (p, _backup_orc_average_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_rrshift6_add_s16_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_rrshift6_add_s16_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_rrshift6_add_s16_2d");
    orc_program_set_backup_function (p, _backup_orc_rrshift6_add_s16_2d);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000020, "c1");
      orc_program_add_constant (p, 4, 0x00000006, "c2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_rrshift6_sub_s16_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_rrshift6_sub_s16_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_rrshift6_sub_s16_2d");
    orc_program_set_backup_function (p, _backup_orc_rrshift6_sub_s16_2d);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00001fe0, "c1");
      orc_program_add_constant (p, 4, 0x00000006, "c2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_rrshift6_s16_ip_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_rrshift6_s16_ip_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_rrshift6_s16_ip_2d");
    orc_program_set_backup_function (p, _backup_orc_rrshift6_s16_ip_2d);
    orc_program_add_destination (p, 2, "d1");
      orc_program_add_constant (p, 4, 0x00001fe0, "c1");
      orc_program_add_constant (p, 4, 0x00000006, "c2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_rrshift6_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_rrshift6_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_rrshift6_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_rrshift6_s16_ip);
    orc_program_add_destination (p, 2, "d1");
      orc_program_add_constant (p, 4, 0x00001fe0, "c1");
      orc_program_add_constant (p, 4, 0x00000006, "c2");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_yuyv_y */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_yuyv_y:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_yuyv_y");
    orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_y);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_yuyv_u */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_yuyv_u:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_yuyv_u");
    orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_u);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 4, "s1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_yuyv_v */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_yuyv_v:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_yuyv_v");
    orc_program_set_backup_function (p, _backup_orc_unpack_yuyv_v);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 4, "s1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_packyuyv */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_packyuyv:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_packyuyv");
    orc_program_set_backup_function (p, _backup_orc_packyuyv);
    orc_program_add_destination (p, 4, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_temporary (p, 1, "t1");
    orc_program_add_temporary (p, 1, "t2");
    orc_program_add_temporary (p, 2, "t3");
    orc_program_add_temporary (p, 2, "t4");
    orc_program_add_temporary (p, 2, "t5");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T5, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T1, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T2, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "mergebw", 0, ORC_VAR_T4, ORC_VAR_T2, ORC_VAR_S3, ORC_VAR_D1);
      orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_uyvy_y */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_uyvy_y:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_uyvy_y");
    orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_y);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "select1wb", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_uyvy_u */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_uyvy_u:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_uyvy_u");
    orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_u);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 4, "s1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_unpack_uyvy_v */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_unpack_uyvy_v:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_unpack_uyvy_v");
    orc_program_set_backup_function (p, _backup_orc_unpack_uyvy_v);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 4, "s1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0wb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_interleave2_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_interleave2_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_interleave2_s16");
    orc_program_set_backup_function (p, _backup_orc_interleave2_s16);
    orc_program_add_destination (p, 4, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");

      orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_interleave2_rrshift1_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_interleave2_rrshift1_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_interleave2_rrshift1_s16");
    orc_program_set_backup_function (p, _backup_orc_interleave2_rrshift1_s16);
    orc_program_add_destination (p, 4, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_deinterleave2_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_deinterleave2_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_deinterleave2_s16");
    orc_program_set_backup_function (p, _backup_orc_deinterleave2_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 4, "s1");
    orc_program_add_temporary (p, 4, "t1");

      orc_program_append_2 (p, "copyl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_deinterleave2_lshift1_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_deinterleave2_lshift1_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_deinterleave2_lshift1_s16");
    orc_program_set_backup_function (p, _backup_orc_deinterleave2_lshift1_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 4, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
    orc_program_add_temporary (p, 4, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");

      orc_program_append_2 (p, "copyl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_D2, ORC_VAR_T3, ORC_VAR_C1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_deint_lshift1_split_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_deint_lshift1_split_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_deint_lshift1_split_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_deint_lshift1_split_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 4, "s1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
      orc_program_add_constant (p, 4, 0x00000000, "c2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");

      orc_program_append_2 (p, "copyl", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_deint_split_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_deint_split_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_deint_split_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_deint_split_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 4, "s1");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");

      orc_program_append_2 (p, "copyl", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0lw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1lw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_split_s16_lo */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_split_s16_lo:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_split_s16_lo");
    orc_program_set_backup_function (p, _backup_orc_haar_split_s16_lo);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_split_s16_hi */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_split_s16_hi:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_split_s16_hi");
    orc_program_set_backup_function (p, _backup_orc_haar_split_s16_hi);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S2, ORC_VAR_S1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_split_s16_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_split_s16_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_split_s16_op");
    orc_program_set_backup_function (p, _backup_orc_haar_split_s16_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_split_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_split_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_split_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_split_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_s16_lo */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_s16_lo:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_s16_lo");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_lo);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_s16_hi */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_s16_hi:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_s16_hi");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_hi);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_s16_op */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_s16_op:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_s16_op");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_s16_op);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T3, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_rrshift1_int_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_rrshift1_int_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_rrshift1_int_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_rrshift1_int_s16);
    orc_program_add_destination (p, 4, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_synth_int_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_synth_int_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_synth_int_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_synth_int_s16);
    orc_program_add_destination (p, 4, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "mergewl", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_sub_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_sub_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_sub_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_sub_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_add_half_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_add_half_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_add_half_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_add_half_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_add_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_add_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_add_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_add_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");

      orc_program_append_2 (p, "addw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_haar_sub_half_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_haar_sub_half_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_haar_sub_half_s16");
    orc_program_set_backup_function (p, _backup_orc_haar_sub_half_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000000, "c1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "avgsw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sum_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sum_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_sum_u8");
    orc_program_set_backup_function (p, _backup_orc_sum_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_accumulator (p, 4, "a1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sum_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sum_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_sum_s16");
    orc_program_set_backup_function (p, _backup_orc_sum_s16);
    orc_program_add_source (p, 2, "s1");
    orc_program_add_accumulator (p, 4, "a1");
    orc_program_add_temporary (p, 4, "t1");

      orc_program_append_2 (p, "convswl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sum_square_diff_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sum_square_diff_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_sum_square_diff_u8");
    orc_program_set_backup_function (p, _backup_orc_sum_square_diff_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16_2d_4xn */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16_2d_4xn:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 4);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_dequantise_s16_2d_4xn");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16_2d_4xn);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16_2d_8xn */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16_2d_8xn:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_dequantise_s16_2d_8xn");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16_2d_8xn);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16_ip_2d_8xn */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16_ip_2d_8xn:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_dequantise_s16_ip_2d_8xn");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip_2d_8xn);
    orc_program_add_destination (p, 2, "d1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16_ip_2d */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16_ip_2d:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_dequantise_s16_ip_2d");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip_2d);
    orc_program_add_destination (p, 2, "d1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_dequantise_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16_ip);
    orc_program_add_destination (p, 2, "d1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_dequantise_s16");
    orc_program_set_backup_function (p, _backup_orc_dequantise_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_dequantise_var_s16_ip */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_dequantise_var_s16_ip:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_dequantise_var_s16_ip");
    orc_program_set_backup_function (p, _backup_orc_dequantise_var_s16_ip);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 2, "s2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_quantise1_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_quantise1_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_quantise1_s16");
    orc_program_set_backup_function (p, _backup_orc_quantise1_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_quantise2_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_quantise2_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_quantise2_s16");
    orc_program_set_backup_function (p, _backup_orc_quantise2_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_quantdequant1_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_quantdequant1_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_quantdequant1_s16");
    orc_program_set_backup_function (p, _backup_orc_quantdequant1_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_parameter (p, 2, "p5");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "mulhuw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P5, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_quantdequant3_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_quantdequant3_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_quantdequant3_s16");
    orc_program_set_backup_function (p, _backup_orc_quantdequant3_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_parameter (p, 2, "p5");
    orc_program_add_parameter (p, 4, "p6");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 4, "t3");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "muluwl", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "addl", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P6, ORC_VAR_D1);
      orc_program_append_2 (p, "shrul", 0, ORC_VAR_T3, ORC_VAR_T3, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "convlw", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P5, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_quantdequant2_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_quantdequant2_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_quantdequant2_s16");
    orc_program_set_backup_function (p, _backup_orc_quantdequant2_s16);
    orc_program_add_destination (p, 2, "d1");
    orc_program_add_destination (p, 2, "d2");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "copyw", 0, ORC_VAR_T1, ORC_VAR_D2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "shlw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "copyw", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "signw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_D2, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_downsample_vert_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_downsample_vert_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_downsample_vert_u8");
    orc_program_set_backup_function (p, _backup_orc_downsample_vert_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000006, "c1");
      orc_program_add_constant (p, 4, 0x0000001a, "c2");
      orc_program_add_constant (p, 4, 0x00000020, "c3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C3, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convwb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_downsample_horiz_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_downsample_horiz_u8:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_downsample_horiz_u8");
    orc_program_set_backup_function (p, _backup_orc_downsample_horiz_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 2, "s1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
      orc_program_add_constant (p, 4, 0x00000006, "c2");
      orc_program_add_constant (p, 4, 0x00000001, "c3");
      orc_program_add_constant (p, 4, 0x0000001a, "c4");
      orc_program_add_constant (p, 4, 0x00000020, "c5");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");
    orc_program_add_temporary (p, 2, "t3");
    orc_program_add_temporary (p, 1, "t4");
    orc_program_add_temporary (p, 1, "t5");
    orc_program_add_temporary (p, 2, "t6");

      orc_program_append_2 (p, "loadw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "select1wb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "select0wb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "loadoffw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_C3, ORC_VAR_D1);
      orc_program_append_2 (p, "splitwb", 0, ORC_VAR_T4, ORC_VAR_T5, ORC_VAR_T3, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_T4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T5, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_C5, ORC_VAR_D1);
      orc_program_append_2 (p, "shruw", 0, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "convwb", 0, ORC_VAR_D1, ORC_VAR_T6, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_stats_moment_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_stats_moment_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_stats_moment_s16");
    orc_program_set_backup_function (p, _backup_orc_stats_moment_s16);
    orc_program_add_source (p, 2, "s1");
    orc_program_add_accumulator (p, 4, "a1");
      orc_program_add_constant (p, 4, 0x00000002, "c1");
      orc_program_add_constant (p, 4, 0x00000000, "c2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "maxsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_stats_above_s16 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_stats_above_s16:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_stats_above_s16");
    orc_program_set_backup_function (p, _backup_orc_stats_above_s16);
    orc_program_add_source (p, 2, "s1");
    orc_program_add_accumulator (p, 4, "a1");
      orc_program_add_constant (p, 4, 0x00000001, "c1");
      orc_program_add_constant (p, 4, 0x00000000, "c2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 4, "t2");

      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "subw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "maxsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "minsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_accw */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_accw:\n");
    p = orc_program_new ();
    orc_program_set_name (p, "orc_accw");
    orc_program_set_backup_function (p, _backup_orc_accw);
    orc_program_add_source (p, 2, "s1");
    orc_program_add_accumulator (p, 2, "a1");
    orc_program_add_temporary (p, 2, "t1");

      orc_program_append_2 (p, "absw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "accw", 0, ORC_VAR_A1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_avg2_8xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_avg2_8xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_avg2_8xn_u8");
    orc_program_set_backup_function (p, _backup_orc_avg2_8xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_avg2_12xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_avg2_12xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 12);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_avg2_12xn_u8");
    orc_program_set_backup_function (p, _backup_orc_avg2_12xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_avg2_16xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_avg2_16xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 16);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_avg2_16xn_u8");
    orc_program_set_backup_function (p, _backup_orc_avg2_16xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_avg2_32xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_avg2_32xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 32);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_avg2_32xn_u8");
    orc_program_set_backup_function (p, _backup_orc_avg2_32xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_avg2_nxm_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_avg2_nxm_u8:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_avg2_nxm_u8");
    orc_program_set_backup_function (p, _backup_orc_avg2_nxm_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");

      orc_program_append_2 (p, "avgub", 0, ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_8xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_8xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_8xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_8xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_12xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_12xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 12);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_12xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_12xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_16xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_16xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 16);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_16xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_16xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_24xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_24xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 24);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_24xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_24xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_32xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_32xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 32);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_32xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_32xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine4_nxm_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine4_nxm_u8:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine4_nxm_u8");
    orc_program_set_backup_function (p, _backup_orc_combine4_nxm_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_source (p, 1, "s4");
      orc_program_add_constant (p, 4, 0x00000008, "c1");
      orc_program_add_constant (p, 4, 0x00000004, "c2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S3, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S4, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C2, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine2_8xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine2_8xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine2_8xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine2_8xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine2_12xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine2_12xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 12);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine2_12xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine2_12xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine2_16xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine2_16xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 16);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine2_16xn_u8");
    orc_program_set_backup_function (p, _backup_orc_combine2_16xn_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_combine2_nxm_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_combine2_nxm_u8:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_combine2_nxm_u8");
    orc_program_set_backup_function (p, _backup_orc_combine2_nxm_u8);
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_parameter (p, 2, "p3");
    orc_program_add_parameter (p, 2, "p4");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1, ORC_VAR_D1);
      orc_program_append_2 (p, "mullw", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2, ORC_VAR_D1);
      orc_program_append_2 (p, "addw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P3, ORC_VAR_D1);
      orc_program_append_2 (p, "shrsw", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P4, ORC_VAR_D1);
      orc_program_append_2 (p, "convsuswb", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sad_nxm_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sad_nxm_u8:\n");
    p = orc_program_new ();
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_sad_nxm_u8");
    orc_program_set_backup_function (p, _backup_orc_sad_nxm_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");

      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sad_8x8_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sad_8x8_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 8);
      orc_program_set_2d (p);
      orc_program_set_constant_m (p, 8);
    orc_program_set_name (p, "orc_sad_8x8_u8");
    orc_program_set_backup_function (p, _backup_orc_sad_8x8_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");

      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sad_12x12_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sad_12x12_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 12);
      orc_program_set_2d (p);
      orc_program_set_constant_m (p, 12);
    orc_program_set_name (p, "orc_sad_12x12_u8");
    orc_program_set_backup_function (p, _backup_orc_sad_12x12_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");

      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sad_16xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sad_16xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 16);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_sad_16xn_u8");
    orc_program_set_backup_function (p, _backup_orc_sad_16xn_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");

      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }

  /* orc_sad_32xn_u8 */
  {
    OrcProgram *p = NULL;
    int ret;

    if (!quiet)      printf ("orc_sad_32xn_u8:\n");
    p = orc_program_new ();
      orc_program_set_constant_n (p, 32);
      orc_program_set_2d (p);
    orc_program_set_name (p, "orc_sad_32xn_u8");
    orc_program_set_backup_function (p, _backup_orc_sad_32xn_u8);
    orc_program_add_source (p, 1, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_accumulator (p, 4, "a1");

      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);

    if (benchmark) {
      printf ("    cycles (emulate) :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_EMULATE, NULL));
    }

    ret = orc_test_compare_output_backup (p);
    if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    backup function  :   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (backup)  :   %g\n",
          orc_test_performance_full (p, ORC_TEST_FLAGS_BACKUP, NULL));
    }

    ret = orc_test_compare_output (p);
    if (ret == ORC_TEST_INDETERMINATE && !quiet) {
      printf ("    compiled function:   COMPILE FAILED\n");
    } else if (!ret) {
      error = TRUE;
    } else if (!quiet) {
      printf ("    compiled function:   PASSED\n");
    }

    if (benchmark) {
      printf ("    cycles (compiled):   %g\n",
          orc_test_performance_full (p, 0, NULL));
    }

    orc_program_free (p);
  }


  if (error) {
    return 1;
  };
  return 0;
}
