
/* Copyright (C) 2003-2008, Free Software Foundation, Inc.
   Contributed by Andy Vaught

  This file is part of g95.

  G95 is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2, or (at your option)
  any later version.

  G95 is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with g95; see the file COPYING.  If not, write to
  the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.

  In addition to the permissions in the GNU General Public License, the
  Free Software Foundation gives you unlimited permission to link the
  compiled version of this file into combinations with other programs,
  and to distribute those combinations without any restriction coming
  from the use of this file.  (The General Public License restrictions
  do apply in other respects; for example, they cover modification of
  the file, and distribution when not linked into a combined executable.)
*/

/* Implement the matmul() intrinsic */


#ifdef SUBROUTINE22

g95_array_descriptor *SUBROUTINE12(g95_array_descriptor *matrix_a,
				   g95_array_descriptor *matrix_b) {
G95_AINT i, j, ext_a, ext_b1, ext_b2, dp, dq;
g95_array_descriptor *r;
char *p, *q;
TYPE_SUM sum;

    ext_a = matrix_a->info[0].ubound - matrix_a->info[0].lbound + 1;
    if (ext_a < 0)
	ext_a = 0;

    ext_b1 = matrix_b->info[0].ubound - matrix_b->info[0].lbound + 1;
    if (ext_b1 < 0)
	ext_b1 = 0;

    ext_b2 = matrix_b->info[1].ubound - matrix_b->info[1].lbound + 1;
    if (ext_b2 < 0)
	ext_b2 = 0;

    if (ext_a != ext_b1)
	runtime_error(matrix_mismatch);

    r = temp_array(1, sizeof(sum), ext_b2);

    /* r(j) = \sum_i matrix_a(i) * matrix_b(i,j) */

    dp = matrix_a->info[0].mult;
    dq = matrix_b->info[0].mult;

    for(j=0; j<ext_b2; j++) {
	INIT_SUM(sum);

	p = matrix_a->offset +
	    matrix_a->info[0].mult * matrix_a->info[0].lbound;

	q = matrix_b->offset +
	    matrix_b->info[0].mult * matrix_b->info[0].lbound +
	    matrix_b->info[1].mult*(j + matrix_b->info[1].lbound);

	for(i=0; i<ext_a; i++) {
	    ACCUMULATE_SUM(sum, p, q);

	    p += dp;
	    q += dq;
	}

	((TYPE_SUM *) r->base)[j] = sum;
    }

    return r;
}


g95_array_descriptor *SUBROUTINE21(g95_array_descriptor *matrix_a,
				   g95_array_descriptor *matrix_b) {
G95_AINT i, j, ext_a1, ext_a2, ext_b, dp, dq;
g95_array_descriptor *r;
char *p, *q;
TYPE_SUM *result;

    ext_a1 = matrix_a->info[0].ubound - matrix_a->info[0].lbound + 1;
    if (ext_a1 < 0)
	ext_a1 = 0;

    ext_a2 = matrix_a->info[1].ubound - matrix_a->info[1].lbound + 1;
    if (ext_a2 < 0)
	ext_a2 = 0;

    ext_b = matrix_b->info[0].ubound - matrix_b->info[0].lbound + 1;
    if (ext_b < 0)
	ext_b = 0;

    if (ext_a2 != ext_b)
	runtime_error(matrix_mismatch);

    r = temp_array(1, sizeof(TYPE_SUM), ext_a1);
    result = (TYPE_SUM *) r->base;

    /* r(i) = \sum_j matrix_a(i,j) * matrix_b(j) */

    /* Sum things such that we traverse matrix A in ascending memory
     * locations, assuming it is contiguous.  This improves performance. */

    for(i=0; i<ext_a1; i++)
	INIT_SUM(result[i]);

    dp = matrix_a->info[0].mult;
    dq = matrix_b->info[0].mult;

    q = matrix_b->offset + matrix_b->info[0].mult*matrix_b->info[0].lbound;

    for(i=0; i<ext_a2; i++) {
	p = matrix_a->offset + matrix_a->info[0].mult*matrix_a->info[0].lbound
	    + matrix_a->info[1].mult * (i+matrix_a->info[1].lbound);

	for(j=0; j<ext_a1; j++) {
	    ACCUMULATE_SUM(result[j], p, q);
	    p += dp;
	}

	q += dq;
    }

    return r;
}


g95_array_descriptor *SUBROUTINE22(g95_array_descriptor *matrix_a,
				   g95_array_descriptor *matrix_b) {
G95_AINT i, j, k, ext_a1, ext_a2, ext_b1, ext_b2, dp;
g95_array_descriptor *r;
TYPE_SUM *m;
TYPE_B e;
char *p;

    ext_a1 = matrix_a->info[0].ubound - matrix_a->info[0].lbound + 1;
    if (ext_a1 < 0)
	ext_a1 = 0;

    ext_a2 = matrix_a->info[1].ubound - matrix_a->info[1].lbound + 1;
    if (ext_a2 < 0)
	ext_a2 = 0;

    ext_b1 = matrix_b->info[0].ubound - matrix_b->info[0].lbound + 1;
    if (ext_b1 < 0)
	ext_b1 = 0;

    ext_b2 = matrix_b->info[1].ubound - matrix_b->info[1].lbound + 1;
    if (ext_b2 < 0)
	ext_b2 = 0;

    if (ext_a2 != ext_b1)
	runtime_error(matrix_mismatch);

    r = temp_array(2, sizeof(TYPE_SUM), ext_a1, ext_b2);

    /* r(i,j) = \sum_k matrix_a(i,k) * matrix_b(k,j) */

    m = (TYPE_SUM *) r->base;

    i = ext_a1 * ext_b2;
    for(k=0; k<i; k++) {
	INIT_SUM22(m);
	m++;
    }

    dp = matrix_a->info[0].mult;

    for(j=0; j<ext_b2; j++) {
	for(k=0; k<ext_b1; k++) {
	    m = (TYPE_SUM *) (r->offset +
			      r->info[0].mult*r->info[0].lbound +
			      r->info[1].mult*(r->info[1].lbound + j));

	    e = *((TYPE_B *) (matrix_b->offset +
		     matrix_b->info[0].mult * (k + matrix_b->info[0].lbound) +
		     matrix_b->info[1].mult * (j + matrix_b->info[1].lbound)));

	    p = matrix_a->offset
		+ matrix_a->info[0].mult * matrix_a->info[0].lbound
		+ matrix_a->info[1].mult * (k + matrix_a->info[1].lbound);

	    for(i=0; i<ext_a1; i++) {
		ACCUMULATE_SUM22(m, p, e);
		p += dp;
		m++;
	    }
	}
    }

    return r;
}

#undef SUBROUTINE12
#undef SUBROUTINE21
#undef SUBROUTINE22
#undef TYPE_A
#undef TYPE_B
#undef TYPE_SUM

#else

#include "runtime.h"

static char matrix_mismatch[] = "Matrix size mismatch in MATMUL()";

#define INIT_SUM(x) x = 0
#define INIT_SUM22(x) *x = 0
#define ACCUMULATE_SUM(x, a, b) x += (*((TYPE_A *) a)) * (*((TYPE_B *) b))
#define ACCUMULATE_SUM22(x, a, b) *x += (*((TYPE_A *) a)) * (b)

#define SUBROUTINE12 prefix(matmul12_i1i1)
#define SUBROUTINE21 prefix(matmul21_i1i1)
#define SUBROUTINE22 prefix(matmul22_i1i1)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT1

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i1i2)
#define SUBROUTINE21 prefix(matmul21_i1i2)
#define SUBROUTINE22 prefix(matmul22_i1i2)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT2

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2i1)
#define SUBROUTINE21 prefix(matmul21_i2i1)
#define SUBROUTINE22 prefix(matmul22_i2i1)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT2

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2i2)
#define SUBROUTINE21 prefix(matmul21_i2i2)
#define SUBROUTINE22 prefix(matmul22_i2i2)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT2

#include "matmul.c"


#define SUBROUTINE12 prefix(matmul12_i1i4)
#define SUBROUTINE21 prefix(matmul21_i1i4)
#define SUBROUTINE22 prefix(matmul22_i1i4)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2i4)
#define SUBROUTINE21 prefix(matmul21_i2i4)
#define SUBROUTINE22 prefix(matmul22_i2i4)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4i1)
#define SUBROUTINE21 prefix(matmul21_i4i1)
#define SUBROUTINE22 prefix(matmul22_i4i1)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4i2)
#define SUBROUTINE21 prefix(matmul21_i4i2)
#define SUBROUTINE22 prefix(matmul22_i4i2)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4i4)
#define SUBROUTINE21 prefix(matmul21_i4i4)
#define SUBROUTINE22 prefix(matmul22_i4i4)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i1i8)
#define SUBROUTINE21 prefix(matmul21_i1i8)
#define SUBROUTINE22 prefix(matmul22_i1i8)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2i8)
#define SUBROUTINE21 prefix(matmul21_i2i8)
#define SUBROUTINE22 prefix(matmul22_i2i8)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4i8)
#define SUBROUTINE21 prefix(matmul21_i4i8)
#define SUBROUTINE22 prefix(matmul22_i4i8)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8i1)
#define SUBROUTINE21 prefix(matmul21_i8i1)
#define SUBROUTINE22 prefix(matmul22_i8i1)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8i2)
#define SUBROUTINE21 prefix(matmul21_i8i2)
#define SUBROUTINE22 prefix(matmul22_i8i2)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8i4)
#define SUBROUTINE21 prefix(matmul21_i8i4)
#define SUBROUTINE22 prefix(matmul22_i8i4)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8i8)
#define SUBROUTINE21 prefix(matmul21_i8i8)
#define SUBROUTINE22 prefix(matmul22_i8i8)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4i1)
#define SUBROUTINE21 prefix(matmul21_r4i1)
#define SUBROUTINE22 prefix(matmul22_r4i1)
#define TYPE_A float
#define TYPE_B G95_INT1
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4i2)
#define SUBROUTINE21 prefix(matmul21_r4i2)
#define SUBROUTINE22 prefix(matmul22_r4i2)
#define TYPE_A float
#define TYPE_B G95_INT2
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4i4)
#define SUBROUTINE21 prefix(matmul21_r4i4)
#define SUBROUTINE22 prefix(matmul22_r4i4)
#define TYPE_A float
#define TYPE_B G95_INT4
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4i8)
#define SUBROUTINE21 prefix(matmul21_r4i8)
#define SUBROUTINE22 prefix(matmul22_r4i8)
#define TYPE_A float
#define TYPE_B G95_INT8
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i1r4)
#define SUBROUTINE21 prefix(matmul21_i1r4)
#define SUBROUTINE22 prefix(matmul22_i1r4)
#define TYPE_A G95_INT1
#define TYPE_B float
#define TYPE_SUM float

#include "matmul.c"
#define SUBROUTINE12 prefix(matmul12_i2r4)
#define SUBROUTINE21 prefix(matmul21_i2r4)
#define SUBROUTINE22 prefix(matmul22_i2r4)
#define TYPE_A G95_INT2
#define TYPE_B float
#define TYPE_SUM float

#include "matmul.c"
#define SUBROUTINE12 prefix(matmul12_i4r4)
#define SUBROUTINE21 prefix(matmul21_i4r4)
#define SUBROUTINE22 prefix(matmul22_i4r4)
#define TYPE_A G95_INT4
#define TYPE_B float
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8r4)
#define SUBROUTINE21 prefix(matmul21_i8r4)
#define SUBROUTINE22 prefix(matmul22_i8r4)
#define TYPE_A G95_INT8
#define TYPE_B float
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4r4)
#define SUBROUTINE21 prefix(matmul21_r4r4)
#define SUBROUTINE22 prefix(matmul22_r4r4)
#define TYPE_A float
#define TYPE_B float
#define TYPE_SUM float

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i1r8)
#define SUBROUTINE21 prefix(matmul21_i1r8)
#define SUBROUTINE22 prefix(matmul22_i1r8)
#define TYPE_A G95_INT1
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2r8)
#define SUBROUTINE21 prefix(matmul21_i2r8)
#define SUBROUTINE22 prefix(matmul22_i2r8)
#define TYPE_A G95_INT2
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4r8)
#define SUBROUTINE21 prefix(matmul21_i4r8)
#define SUBROUTINE22 prefix(matmul22_i4r8)
#define TYPE_A G95_INT4
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8r8)
#define SUBROUTINE21 prefix(matmul21_i8r8)
#define SUBROUTINE22 prefix(matmul22_i8r8)
#define TYPE_A G95_INT8
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4r8)
#define SUBROUTINE21 prefix(matmul21_r4r8)
#define SUBROUTINE22 prefix(matmul22_r4r8)
#define TYPE_A float
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8i1)
#define SUBROUTINE21 prefix(matmul21_r8i1)
#define SUBROUTINE22 prefix(matmul22_r8i1)
#define TYPE_A double
#define TYPE_B G95_INT1
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8i2)
#define SUBROUTINE21 prefix(matmul21_r8i2)
#define SUBROUTINE22 prefix(matmul22_r8i2)
#define TYPE_A double
#define TYPE_B G95_INT2
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8i4)
#define SUBROUTINE21 prefix(matmul21_r8i4)
#define SUBROUTINE22 prefix(matmul22_r8i4)
#define TYPE_A double
#define TYPE_B G95_INT4
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8i8)
#define SUBROUTINE21 prefix(matmul21_r8i8)
#define SUBROUTINE22 prefix(matmul22_r8i8)
#define TYPE_A double
#define TYPE_B G95_INT8
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8r4)
#define SUBROUTINE21 prefix(matmul21_r8r4)
#define SUBROUTINE22 prefix(matmul22_r8r4)
#define TYPE_A double
#define TYPE_B float
#define TYPE_SUM double

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8r8)
#define SUBROUTINE21 prefix(matmul21_r8r8)
#define SUBROUTINE22 prefix(matmul22_r8r8)
#define TYPE_A double
#define TYPE_B double
#define TYPE_SUM double

#include "matmul.c"

/* Mixed real/complex */

#undef INIT_SUM
#define INIT_SUM(x) x.r = x.c = 0

#undef INIT_SUM22
#define INIT_SUM22(x) x->r = x->c = 0

#undef ACCUMULATE_SUM
#define ACCUMULATE_SUM(x, a, b) \
    x.r += (*((TYPE_A *) a)) * ((TYPE_B *) b)->r; \
    x.c += (*((TYPE_A *) a)) * ((TYPE_B *) b)->c

#undef ACCUMULATE_SUM22
#define ACCUMULATE_SUM22(x, a, b) \
    x->r += (*((TYPE_A *) a)) * b.r; \
    x->c += (*((TYPE_A *) a)) * b.c;


#define SUBROUTINE12 prefix(matmul12_i1z4)
#define SUBROUTINE21 prefix(matmul21_i1z4)
#define SUBROUTINE22 prefix(matmul22_i1z4)
#define TYPE_A G95_INT1
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2z4)
#define SUBROUTINE21 prefix(matmul21_i2z4)
#define SUBROUTINE22 prefix(matmul22_i2z4)
#define TYPE_A G95_INT2
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4z4)
#define SUBROUTINE21 prefix(matmul21_i4z4)
#define SUBROUTINE22 prefix(matmul22_i4z4)
#define TYPE_A G95_INT4
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8z4)
#define SUBROUTINE21 prefix(matmul21_i8z4)
#define SUBROUTINE22 prefix(matmul22_i8z4)
#define TYPE_A G95_INT8
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4z4)
#define SUBROUTINE21 prefix(matmul21_r4z4)
#define SUBROUTINE22 prefix(matmul22_r4z4)
#define TYPE_A float
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8z4)
#define SUBROUTINE21 prefix(matmul21_r8z4)
#define SUBROUTINE22 prefix(matmul22_r8z4)
#define TYPE_A double
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i1z8)
#define SUBROUTINE21 prefix(matmul21_i1z8)
#define SUBROUTINE22 prefix(matmul22_i1z8)
#define TYPE_A G95_INT1
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i2z8)
#define SUBROUTINE21 prefix(matmul21_i2z8)
#define SUBROUTINE22 prefix(matmul22_i2z8)
#define TYPE_A G95_INT2
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i4z8)
#define SUBROUTINE21 prefix(matmul21_i4z8)
#define SUBROUTINE22 prefix(matmul22_i4z8)
#define TYPE_A G95_INT4
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_i8z8)
#define SUBROUTINE21 prefix(matmul21_i8z8)
#define SUBROUTINE22 prefix(matmul22_i8z8)
#define TYPE_A G95_INT8
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r4z8)
#define SUBROUTINE21 prefix(matmul21_r4z8)
#define SUBROUTINE22 prefix(matmul22_r4z8)
#define TYPE_A float
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_r8z8)
#define SUBROUTINE21 prefix(matmul21_r8z8)
#define SUBROUTINE22 prefix(matmul22_r8z8)
#define TYPE_A double
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

/* Mixed complex/real */

#undef ACCUMULATE_SUM
#define ACCUMULATE_SUM(x, a, b) \
    x.r += ((TYPE_A *) a)->r * (*(TYPE_B *) b); \
    x.c += ((TYPE_A *) a)->c * (*(TYPE_B *) b)

#undef ACCUMULATE_SUM22
#define ACCUMULATE_SUM22(x, a, b) \
    x->r += ((TYPE_A *) a)->r * b; \
    x->c += ((TYPE_A *) a)->c * b

#define SUBROUTINE12 prefix(matmul12_z4i1)
#define SUBROUTINE21 prefix(matmul21_z4i1)
#define SUBROUTINE22 prefix(matmul22_z4i1)
#define TYPE_A z4
#define TYPE_B G95_INT1
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4i2)
#define SUBROUTINE21 prefix(matmul21_z4i2)
#define SUBROUTINE22 prefix(matmul22_z4i2)
#define TYPE_A z4
#define TYPE_B G95_INT2
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4i4)
#define SUBROUTINE21 prefix(matmul21_z4i4)
#define SUBROUTINE22 prefix(matmul22_z4i4)
#define TYPE_A z4
#define TYPE_B G95_INT4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4i8)
#define SUBROUTINE21 prefix(matmul21_z4i8)
#define SUBROUTINE22 prefix(matmul22_z4i8)
#define TYPE_A z4
#define TYPE_B G95_INT8
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4r4)
#define SUBROUTINE21 prefix(matmul21_z4r4)
#define SUBROUTINE22 prefix(matmul22_z4r4)
#define TYPE_A z4
#define TYPE_B float
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4r8)
#define SUBROUTINE21 prefix(matmul21_z4r8)
#define SUBROUTINE22 prefix(matmul22_z4r8)
#define TYPE_A z4
#define TYPE_B double
#define TYPE_SUM z4

#include "matmul.c"
#define SUBROUTINE12 prefix(matmul12_z8i1)
#define SUBROUTINE21 prefix(matmul21_z8i1)
#define SUBROUTINE22 prefix(matmul22_z8i1)
#define TYPE_A z8
#define TYPE_B G95_INT1
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8i2)
#define SUBROUTINE21 prefix(matmul21_z8i2)
#define SUBROUTINE22 prefix(matmul22_z8i2)
#define TYPE_A z8
#define TYPE_B G95_INT2
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8i4)
#define SUBROUTINE21 prefix(matmul21_z8i4)
#define SUBROUTINE22 prefix(matmul22_z8i4)
#define TYPE_A z8
#define TYPE_B G95_INT4
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8i8)
#define SUBROUTINE21 prefix(matmul21_z8i8)
#define SUBROUTINE22 prefix(matmul22_z8i8)
#define TYPE_A z8
#define TYPE_B G95_INT8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8r4)
#define SUBROUTINE21 prefix(matmul21_z8r4)
#define SUBROUTINE22 prefix(matmul22_z8r4)
#define TYPE_A z8
#define TYPE_B float
#define TYPE_SUM z8

#include "matmul.c"
#define SUBROUTINE12 prefix(matmul12_z8r8)
#define SUBROUTINE21 prefix(matmul21_z8r8)
#define SUBROUTINE22 prefix(matmul22_z8r8)
#define TYPE_A z8
#define TYPE_B double
#define TYPE_SUM z8

#include "matmul.c"

/* Mixed complex/complex */

#undef INIT_SUM
#define INIT_SUM(x) x.r = x.c = 0

#undef INIT_SUM22
#define INIT_SUM22(x) x->r = x->c = 0

#undef ACCUMULATE_SUM
#define ACCUMULATE_SUM(x, a, b) \
     x.r += ((TYPE_A *) a)->r * ((TYPE_B *) b)->r   \
         - ((TYPE_A *) a)->c * ((TYPE_B *) b)->c;  \
     x.c += ((TYPE_A *) a)->r * ((TYPE_B *) b)->c   \
            + ((TYPE_A *) a)->c * ((TYPE_B *) b)->r

#undef ACCUMULATE_SUM22
#define ACCUMULATE_SUM22(x, a, b) \
     x->r += ((TYPE_A *) a)->r * b.r - ((TYPE_A *) a)->c * b.c;  \
     x->c += ((TYPE_A *) a)->r * b.c + ((TYPE_A *) a)->c * b.r


#define SUBROUTINE12 prefix(matmul12_z4z4)
#define SUBROUTINE21 prefix(matmul21_z4z4)
#define SUBROUTINE22 prefix(matmul22_z4z4)
#define TYPE_A z4
#define TYPE_B z4
#define TYPE_SUM z4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z4z8)
#define SUBROUTINE21 prefix(matmul21_z4z8)
#define SUBROUTINE22 prefix(matmul22_z4z8)
#define TYPE_A z4
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8z4)
#define SUBROUTINE21 prefix(matmul21_z8z4)
#define SUBROUTINE22 prefix(matmul22_z8z4)
#define TYPE_A z8
#define TYPE_B z4
#define TYPE_SUM z8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_z8z8)
#define SUBROUTINE21 prefix(matmul21_z8z8)
#define SUBROUTINE22 prefix(matmul22_z8z8)
#define TYPE_A z8
#define TYPE_B z8
#define TYPE_SUM z8

#include "matmul.c"


/* Logicals */


#undef INIT_SUM
#define INIT_SUM(x) x = 0

#undef INIT_SUM22
#define INIT_SUM22(x) *x = 0

#undef ACCUMULATE_SUM
#define ACCUMULATE_SUM(x, a, b) x |= (*((TYPE_A *) a)) && (*((TYPE_B *) b))

#undef ACCUMULATE_SUM22
#define ACCUMULATE_SUM22(x, a, b) *x |= (*((TYPE_A *) a)) && b


#define SUBROUTINE12 prefix(matmul12_l1l1)
#define SUBROUTINE21 prefix(matmul21_l1l1)
#define SUBROUTINE22 prefix(matmul22_l1l1)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT1

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l1l2)
#define SUBROUTINE21 prefix(matmul21_l1l2)
#define SUBROUTINE22 prefix(matmul22_l1l2)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT2

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l2l1)
#define SUBROUTINE21 prefix(matmul21_l2l1)
#define SUBROUTINE22 prefix(matmul22_l2l1)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT2

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l2l2)
#define SUBROUTINE21 prefix(matmul21_l2l2)
#define SUBROUTINE22 prefix(matmul22_l2l2)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT2

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l1l4)
#define SUBROUTINE21 prefix(matmul21_l1l4)
#define SUBROUTINE22 prefix(matmul22_l1l4)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l2l4)
#define SUBROUTINE21 prefix(matmul21_l2l4)
#define SUBROUTINE22 prefix(matmul22_l2l4)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l4l1)
#define SUBROUTINE21 prefix(matmul21_l4l1)
#define SUBROUTINE22 prefix(matmul22_l4l1)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l4l2)
#define SUBROUTINE21 prefix(matmul21_l4l2)
#define SUBROUTINE22 prefix(matmul22_l4l2)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l4l4)
#define SUBROUTINE21 prefix(matmul21_l4l4)
#define SUBROUTINE22 prefix(matmul22_l4l4)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT4

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l8l1)
#define SUBROUTINE21 prefix(matmul21_l8l1)
#define SUBROUTINE22 prefix(matmul22_l8l1)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT1
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l8l2)
#define SUBROUTINE21 prefix(matmul21_l8l2)
#define SUBROUTINE22 prefix(matmul22_l8l2)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT2
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l8l4)
#define SUBROUTINE21 prefix(matmul21_l8l4)
#define SUBROUTINE22 prefix(matmul22_l8l4)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT4
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l1l8)
#define SUBROUTINE21 prefix(matmul21_l1l8)
#define SUBROUTINE22 prefix(matmul22_l1l8)
#define TYPE_A G95_INT1
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l2l8)
#define SUBROUTINE21 prefix(matmul21_l2l8)
#define SUBROUTINE22 prefix(matmul22_l2l8)
#define TYPE_A G95_INT2
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l4l8)
#define SUBROUTINE21 prefix(matmul21_l4l8)
#define SUBROUTINE22 prefix(matmul22_l4l8)
#define TYPE_A G95_INT4
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#define SUBROUTINE12 prefix(matmul12_l8l8)
#define SUBROUTINE21 prefix(matmul21_l8l8)
#define SUBROUTINE22 prefix(matmul22_l8l8)
#define TYPE_A G95_INT8
#define TYPE_B G95_INT8
#define TYPE_SUM G95_INT8

#include "matmul.c"

#endif
