summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: cf5fdd8)
raw | patch | inline | side by side (parent: cf5fdd8)
author | Jianzhong Xu <a0869574@ti.com> | |
Thu, 12 Mar 2015 20:27:24 +0000 (16:27 -0400) | ||
committer | Jianzhong Xu <a0869574@ti.com> | |
Thu, 12 Mar 2015 20:27:24 +0000 (16:27 -0400) |
Makefile | patch | blob | history | |
build/tar_files_list.txt | patch | blob | history | |
examples/ludinv/Makefile | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlaran.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlarnd.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlatm1.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlatm2.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlatm3.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/dlatmr.c | [new file with mode: 0644] | patch | blob |
examples/ludinv/main.c | [new file with mode: 0644] | patch | blob |
examples/make.inc | patch | blob | history |
diff --git a/Makefile b/Makefile
index 583405280b5b53a4ffa656285a4cec0bddeb105c..8663e831823a80db3dcde5b81cd12115b9b5f8fb 100644 (file)
--- a/Makefile
+++ b/Makefile
ARMonly:
cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make -j8; make install; \
- cd ../$(LINALG_BLISACC_DIR)/src; make -f Makefile.ARM
+ cd ../$(LINALG_BLISACC_DIR)/src; make -f Makefile.ARM; \
+ cd ../../clapack; make f2clib; make cblaswrap; cd SRC; make
ARMplusDSP:
cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; make arch=C66 alllib; \
cp $(LINALG_CBLAS_DIR)/include/cblas.h ${DESTDIR}/usr/include
cp $(LINALG_BLISACC_DIR)/lib/libcblas_armplusdsp.a ${DESTDIR}/usr/lib
cp $(LINALG_BLIS_DIR)/install/arm/lib/libblis-$(BLIS_VERSION)-cortex-a15.a ${DESTDIR}/usr/lib/libblis.a
+ cp clapack/INCLUDE/*.h ${DESTDIR}/usr/include
cp -r ./examples/* ${DESTDIR}/usr/share/ti/examples/linalg
+ cp clapack/lapack_ARM.a ${DESTDIR}/usr/lib/liblapack.a
+ cp clapack/libcblaswr_ARM.a ${DESTDIR}/usr/lib/libcblaswr.a
+ cp clapack/F2CLIBS/libf2c_ARM.a ${DESTDIR}/usr/lib/libf2c.a
index 60dcd0ca2d9853c0012326fed07aa4c548f5da27..4eebae3824016db07cbfb14a9a908d78c5d3ad4d 100644 (file)
--- a/build/tar_files_list.txt
+++ b/build/tar_files_list.txt
make.inc
Makefile
debian
-examples
+examples/make.inc
+examples/Makefile
+examples/ludinv
+examples/matmpy
blis/version
blis/build
blis/CHANGELOG
cblas/README
cblas/README.TI
cblas/src
+clapack
\ No newline at end of file
diff --git a/examples/ludinv/Makefile b/examples/ludinv/Makefile
--- /dev/null
+++ b/examples/ludinv/Makefile
@@ -0,0 +1,22 @@
+EXE = ludinv
+
+include ../make.inc
+
+$(EXE): main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o
+ $(CC) $(CFLAGS) main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o $(LAPACKLIB) -o $@
+
+ARMtest: $(EXE)
+ @echo "Forcing execution on ARM."; \
+ export TI_CBLAS_OFFLOAD=000; \
+ ./$(EXE);
+
+DSPtest: $(EXE)
+ @echo "Forcing execution on DSP."; \
+ export TI_CBLAS_OFFLOAD=001; \
+ ./$(EXE);
+
+OPTtest: $(EXE)
+ @echo "Optimal execution on ARM or DSP."; \
+ export TI_CBLAS_OFFLOAD=002; \
+ ./$(EXE);
+
diff --git a/examples/ludinv/dlaran.c b/examples/ludinv/dlaran.c
--- /dev/null
+++ b/examples/ludinv/dlaran.c
@@ -0,0 +1,123 @@
+/* dlaran.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+doublereal dlaran_(integer *iseed)
+{
+ /* System generated locals */
+ doublereal ret_val;
+
+ /* Local variables */
+ integer it1, it2, it3, it4;
+ doublereal rndout;
+
+
+/* -- LAPACK auxiliary routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLARAN returns a random real number from a uniform (0,1) */
+/* distribution. */
+
+/* Arguments */
+/* ========= */
+
+/* ISEED (input/output) INTEGER array, dimension (4) */
+/* On entry, the seed of the random number generator; the array */
+/* elements must be between 0 and 4095, and ISEED(4) must be */
+/* odd. */
+/* On exit, the seed is updated. */
+
+/* Further Details */
+/* =============== */
+
+/* This routine uses a multiplicative congruential method with modulus */
+/* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */
+/* 'Multiplicative congruential random number generators with modulus */
+/* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */
+/* b = 48', Math. Comp. 189, pp 331-344, 1990). */
+
+/* 48-bit integers are stored in 4 integer array elements with 12 bits */
+/* per element. Hence the routine is portable across machines with */
+/* integers of 32 bits or more. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Executable Statements .. */
+ /* Parameter adjustments */
+ --iseed;
+
+ /* Function Body */
+L10:
+
+/* multiply the seed by the multiplier modulo 2**48 */
+
+ it4 = iseed[4] * 2549;
+ it3 = it4 / 4096;
+ it4 -= it3 << 12;
+ it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508;
+ it2 = it3 / 4096;
+ it3 -= it2 << 12;
+ it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322;
+ it1 = it2 / 4096;
+ it2 -= it1 << 12;
+ it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4]
+ * 494;
+ it1 %= 4096;
+
+/* return updated seed */
+
+ iseed[1] = it1;
+ iseed[2] = it2;
+ iseed[3] = it3;
+ iseed[4] = it4;
+
+/* convert 48-bit integer to a real number in the interval (0,1) */
+
+ rndout = ((doublereal) it1 + ((doublereal) it2 + ((doublereal) it3 + (
+ doublereal) it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4)
+ * 2.44140625e-4;
+
+ if (rndout == 1.) {
+/* If a real number has n bits of precision, and the first */
+/* n bits of the 48-bit integer above happen to be all 1 (which */
+/* will occur about once every 2**n calls), then DLARAN will */
+/* be rounded to exactly 1.0. */
+/* Since DLARAN is not supposed to return exactly 0.0 or 1.0 */
+/* (and some callers of DLARAN, such as CLARND, depend on that), */
+/* the statistically correct thing to do in this situation is */
+/* simply to iterate again. */
+/* N.B. the case DLARAN = 0.0 should not be possible. */
+
+ goto L10;
+ }
+
+ ret_val = rndout;
+ return ret_val;
+
+/* End of DLARAN */
+
+} /* dlaran_ */
diff --git a/examples/ludinv/dlarnd.c b/examples/ludinv/dlarnd.c
--- /dev/null
+++ b/examples/ludinv/dlarnd.c
@@ -0,0 +1,108 @@
+/* dlarnd.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+doublereal dlarnd_(integer *idist, integer *iseed)
+{
+ /* System generated locals */
+ doublereal ret_val;
+
+ /* Builtin functions */
+ double log(doublereal), sqrt(doublereal), cos(doublereal);
+
+ /* Local variables */
+ doublereal t1, t2;
+ extern doublereal dlaran_(integer *);
+
+
+/* -- LAPACK auxiliary routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLARND returns a random real number from a uniform or normal */
+/* distribution. */
+
+/* Arguments */
+/* ========= */
+
+/* IDIST (input) INTEGER */
+/* Specifies the distribution of the random numbers: */
+/* = 1: uniform (0,1) */
+/* = 2: uniform (-1,1) */
+/* = 3: normal (0,1) */
+
+/* ISEED (input/output) INTEGER array, dimension (4) */
+/* On entry, the seed of the random number generator; the array */
+/* elements must be between 0 and 4095, and ISEED(4) must be */
+/* odd. */
+/* On exit, the seed is updated. */
+
+/* Further Details */
+/* =============== */
+
+/* This routine calls the auxiliary routine DLARAN to generate a random */
+/* real number from a uniform (0,1) distribution. The Box-Muller method */
+/* is used to transform numbers from a uniform to a normal distribution. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Executable Statements .. */
+
+/* Generate a real random number from a uniform (0,1) distribution */
+
+ /* Parameter adjustments */
+ --iseed;
+
+ /* Function Body */
+ t1 = dlaran_(&iseed[1]);
+
+ if (*idist == 1) {
+
+/* uniform (0,1) */
+
+ ret_val = t1;
+ } else if (*idist == 2) {
+
+/* uniform (-1,1) */
+
+ ret_val = t1 * 2. - 1.;
+ } else if (*idist == 3) {
+
+/* normal (0,1) */
+
+ t2 = dlaran_(&iseed[1]);
+ ret_val = sqrt(log(t1) * -2.) * cos(t2 *
+ 6.2831853071795864769252867663);
+ }
+ return ret_val;
+
+/* End of DLARND */
+
+} /* dlarnd_ */
diff --git a/examples/ludinv/dlatm1.c b/examples/ludinv/dlatm1.c
--- /dev/null
+++ b/examples/ludinv/dlatm1.c
@@ -0,0 +1,283 @@
+/* dlatm1.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+/* Subroutine */ int dlatm1_(integer *mode, doublereal *cond, integer *irsign,
+ integer *idist, integer *iseed, doublereal *d__, integer *n, integer
+ *info)
+{
+ /* System generated locals */
+ integer i__1, i__2;
+ doublereal d__1;
+
+ /* Builtin functions */
+ double pow_dd(doublereal *, doublereal *), pow_di(doublereal *, integer *)
+ , log(doublereal), exp(doublereal);
+
+ /* Local variables */
+ integer i__;
+ doublereal temp, alpha;
+ extern doublereal dlaran_(integer *);
+ extern /* Subroutine */ int xerbla_(char *, integer *), dlarnv_(
+ integer *, integer *, integer *, doublereal *);
+
+
+/* -- LAPACK auxiliary test routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLATM1 computes the entries of D(1..N) as specified by */
+/* MODE, COND and IRSIGN. IDIST and ISEED determine the generation */
+/* of random numbers. DLATM1 is called by SLATMR to generate */
+/* random test matrices for LAPACK programs. */
+
+/* Arguments */
+/* ========= */
+
+/* MODE - INTEGER */
+/* On entry describes how D is to be computed: */
+/* MODE = 0 means do not change D. */
+/* MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND */
+/* MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND */
+/* MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) */
+/* MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) */
+/* MODE = 5 sets D to random numbers in the range */
+/* ( 1/COND , 1 ) such that their logarithms */
+/* are uniformly distributed. */
+/* MODE = 6 set D to random numbers from same distribution */
+/* as the rest of the matrix. */
+/* MODE < 0 has the same meaning as ABS(MODE), except that */
+/* the order of the elements of D is reversed. */
+/* Thus if MODE is positive, D has entries ranging from */
+/* 1 to 1/COND, if negative, from 1/COND to 1, */
+/* Not modified. */
+
+/* COND - DOUBLE PRECISION */
+/* On entry, used as described under MODE above. */
+/* If used, it must be >= 1. Not modified. */
+
+/* IRSIGN - INTEGER */
+/* On entry, if MODE neither -6, 0 nor 6, determines sign of */
+/* entries of D */
+/* 0 => leave entries of D unchanged */
+/* 1 => multiply each entry of D by 1 or -1 with probability .5 */
+
+/* IDIST - CHARACTER*1 */
+/* On entry, IDIST specifies the type of distribution to be */
+/* used to generate a random matrix . */
+/* 1 => UNIFORM( 0, 1 ) */
+/* 2 => UNIFORM( -1, 1 ) */
+/* 3 => NORMAL( 0, 1 ) */
+/* Not modified. */
+
+/* ISEED - INTEGER array, dimension ( 4 ) */
+/* On entry ISEED specifies the seed of the random number */
+/* generator. The random number generator uses a */
+/* linear congruential sequence limited to small */
+/* integers, and so should produce machine independent */
+/* random numbers. The values of ISEED are changed on */
+/* exit, and can be used in the next call to DLATM1 */
+/* to continue the same random number sequence. */
+/* Changed on exit. */
+
+/* D - DOUBLE PRECISION array, dimension ( MIN( M , N ) ) */
+/* Array to be computed according to MODE, COND and IRSIGN. */
+/* May be changed on exit if MODE is nonzero. */
+
+/* N - INTEGER */
+/* Number of entries of D. Not modified. */
+
+/* INFO - INTEGER */
+/* 0 => normal termination */
+/* -1 => if MODE not in range -6 to 6 */
+/* -2 => if MODE neither -6, 0 nor 6, and */
+/* IRSIGN neither 0 nor 1 */
+/* -3 => if MODE neither -6, 0 nor 6 and COND less than 1 */
+/* -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 3 */
+/* -7 => if N negative */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Executable Statements .. */
+
+/* Decode and Test the input parameters. Initialize flags & seed. */
+
+ /* Parameter adjustments */
+ --d__;
+ --iseed;
+
+ /* Function Body */
+ *info = 0;
+
+/* Quick return if possible */
+
+ if (*n == 0) {
+ return 0;
+ }
+
+/* Set INFO if an error */
+
+ if (*mode < -6 || *mode > 6) {
+ *info = -1;
+ } else if (*mode != -6 && *mode != 0 && *mode != 6 && (*irsign != 0 && *
+ irsign != 1)) {
+ *info = -2;
+ } else if (*mode != -6 && *mode != 0 && *mode != 6 && *cond < 1.) {
+ *info = -3;
+ } else if ((*mode == 6 || *mode == -6) && (*idist < 1 || *idist > 3)) {
+ *info = -4;
+ } else if (*n < 0) {
+ *info = -7;
+ }
+
+ if (*info != 0) {
+ i__1 = -(*info);
+ xerbla_("DLATM1", &i__1);
+ return 0;
+ }
+
+/* Compute D according to COND and MODE */
+
+ if (*mode != 0) {
+ switch (abs(*mode)) {
+ case 1: goto L10;
+ case 2: goto L30;
+ case 3: goto L50;
+ case 4: goto L70;
+ case 5: goto L90;
+ case 6: goto L110;
+ }
+
+/* One large D value: */
+
+L10:
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ d__[i__] = 1. / *cond;
+/* L20: */
+ }
+ d__[1] = 1.;
+ goto L120;
+
+/* One small D value: */
+
+L30:
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ d__[i__] = 1.;
+/* L40: */
+ }
+ d__[*n] = 1. / *cond;
+ goto L120;
+
+/* Exponentially distributed D values: */
+
+L50:
+ d__[1] = 1.;
+ if (*n > 1) {
+ d__1 = -1. / (doublereal) (*n - 1);
+ alpha = pow_dd(cond, &d__1);
+ i__1 = *n;
+ for (i__ = 2; i__ <= i__1; ++i__) {
+ i__2 = i__ - 1;
+ d__[i__] = pow_di(&alpha, &i__2);
+/* L60: */
+ }
+ }
+ goto L120;
+
+/* Arithmetically distributed D values: */
+
+L70:
+ d__[1] = 1.;
+ if (*n > 1) {
+ temp = 1. / *cond;
+ alpha = (1. - temp) / (doublereal) (*n - 1);
+ i__1 = *n;
+ for (i__ = 2; i__ <= i__1; ++i__) {
+ d__[i__] = (doublereal) (*n - i__) * alpha + temp;
+/* L80: */
+ }
+ }
+ goto L120;
+
+/* Randomly distributed D values on ( 1/COND , 1): */
+
+L90:
+ alpha = log(1. / *cond);
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ d__[i__] = exp(alpha * dlaran_(&iseed[1]));
+/* L100: */
+ }
+ goto L120;
+
+/* Randomly distributed D values from IDIST */
+
+L110:
+ dlarnv_(idist, &iseed[1], n, &d__[1]);
+
+L120:
+
+/* If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign */
+/* random signs to D */
+
+ if (*mode != -6 && *mode != 0 && *mode != 6 && *irsign == 1) {
+ i__1 = *n;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ temp = dlaran_(&iseed[1]);
+ if (temp > .5) {
+ d__[i__] = -d__[i__];
+ }
+/* L130: */
+ }
+ }
+
+/* Reverse if MODE < 0 */
+
+ if (*mode < 0) {
+ i__1 = *n / 2;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ temp = d__[i__];
+ d__[i__] = d__[*n + 1 - i__];
+ d__[*n + 1 - i__] = temp;
+/* L140: */
+ }
+ }
+
+ }
+
+ return 0;
+
+/* End of DLATM1 */
+
+} /* dlatm1_ */
diff --git a/examples/ludinv/dlatm2.c b/examples/ludinv/dlatm2.c
--- /dev/null
+++ b/examples/ludinv/dlatm2.c
@@ -0,0 +1,251 @@
+/* dlatm2.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+doublereal dlatm2_(integer *m, integer *n, integer *i__, integer *j, integer *
+ kl, integer *ku, integer *idist, integer *iseed, doublereal *d__,
+ integer *igrade, doublereal *dl, doublereal *dr, integer *ipvtng,
+ integer *iwork, doublereal *sparse)
+{
+ /* System generated locals */
+ doublereal ret_val;
+
+ /* Local variables */
+ integer isub, jsub;
+ doublereal temp;
+ extern doublereal dlaran_(integer *), dlarnd_(integer *, integer *);
+
+
+/* -- LAPACK auxiliary test routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+
+/* .. */
+
+/* .. Array Arguments .. */
+
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLATM2 returns the (I,J) entry of a random matrix of dimension */
+/* (M, N) described by the other paramters. It is called by the */
+/* DLATMR routine in order to build random test matrices. No error */
+/* checking on parameters is done, because this routine is called in */
+/* a tight loop by DLATMR which has already checked the parameters. */
+
+/* Use of DLATM2 differs from SLATM3 in the order in which the random */
+/* number generator is called to fill in random matrix entries. */
+/* With DLATM2, the generator is called to fill in the pivoted matrix */
+/* columnwise. With DLATM3, the generator is called to fill in the */
+/* matrix columnwise, after which it is pivoted. Thus, DLATM3 can */
+/* be used to construct random matrices which differ only in their */
+/* order of rows and/or columns. DLATM2 is used to construct band */
+/* matrices while avoiding calling the random number generator for */
+/* entries outside the band (and therefore generating random numbers */
+
+/* The matrix whose (I,J) entry is returned is constructed as */
+/* follows (this routine only computes one entry): */
+
+/* If I is outside (1..M) or J is outside (1..N), return zero */
+/* (this is convenient for generating matrices in band format). */
+
+/* Generate a matrix A with random entries of distribution IDIST. */
+
+/* Set the diagonal to D. */
+
+/* Grade the matrix, if desired, from the left (by DL) and/or */
+/* from the right (by DR or DL) as specified by IGRADE. */
+
+/* Permute, if desired, the rows and/or columns as specified by */
+/* IPVTNG and IWORK. */
+
+/* Band the matrix to have lower bandwidth KL and upper */
+/* bandwidth KU. */
+
+/* Set random entries to zero as specified by SPARSE. */
+
+/* Arguments */
+/* ========= */
+
+/* M - INTEGER */
+/* Number of rows of matrix. Not modified. */
+
+/* N - INTEGER */
+/* Number of columns of matrix. Not modified. */
+
+/* I - INTEGER */
+/* Row of entry to be returned. Not modified. */
+
+/* J - INTEGER */
+/* Column of entry to be returned. Not modified. */
+
+/* KL - INTEGER */
+/* Lower bandwidth. Not modified. */
+
+/* KU - INTEGER */
+/* Upper bandwidth. Not modified. */
+
+/* IDIST - INTEGER */
+/* On entry, IDIST specifies the type of distribution to be */
+/* used to generate a random matrix . */
+/* 1 => UNIFORM( 0, 1 ) */
+/* 2 => UNIFORM( -1, 1 ) */
+/* 3 => NORMAL( 0, 1 ) */
+/* Not modified. */
+
+/* ISEED - INTEGER array of dimension ( 4 ) */
+/* Seed for random number generator. */
+/* Changed on exit. */
+
+/* D - DOUBLE PRECISION array of dimension ( MIN( I , J ) ) */
+/* Diagonal entries of matrix. Not modified. */
+
+/* IGRADE - INTEGER */
+/* Specifies grading of matrix as follows: */
+/* 0 => no grading */
+/* 1 => matrix premultiplied by diag( DL ) */
+/* 2 => matrix postmultiplied by diag( DR ) */
+/* 3 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DR ) */
+/* 4 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by inv( diag( DL ) ) */
+/* 5 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DL ) */
+/* Not modified. */
+
+/* DL - DOUBLE PRECISION array ( I or J, as appropriate ) */
+/* Left scale factors for grading matrix. Not modified. */
+
+/* DR - DOUBLE PRECISION array ( I or J, as appropriate ) */
+/* Right scale factors for grading matrix. Not modified. */
+
+/* IPVTNG - INTEGER */
+/* On entry specifies pivoting permutations as follows: */
+/* 0 => none. */
+/* 1 => row pivoting. */
+/* 2 => column pivoting. */
+/* 3 => full pivoting, i.e., on both sides. */
+/* Not modified. */
+
+/* IWORK - INTEGER array ( I or J, as appropriate ) */
+/* This array specifies the permutation used. The */
+/* row (or column) in position K was originally in */
+/* position IWORK( K ). */
+/* This differs from IWORK for DLATM3. Not modified. */
+
+/* SPARSE - DOUBLE PRECISION between 0. and 1. */
+/* On entry specifies the sparsity of the matrix */
+/* if sparse matix is to be generated. */
+/* SPARSE should lie between 0 and 1. */
+/* A uniform ( 0, 1 ) random number x is generated and */
+/* compared to SPARSE; if x is larger the matrix entry */
+/* is unchanged and if x is smaller the entry is set */
+/* to zero. Thus on the average a fraction SPARSE of the */
+/* entries will be set to zero. */
+/* Not modified. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+
+/* .. */
+
+/* .. Local Scalars .. */
+
+/* .. */
+
+/* .. External Functions .. */
+
+/* .. */
+
+/* ----------------------------------------------------------------------- */
+
+/* .. Executable Statements .. */
+
+
+/* Check for I and J in range */
+
+ /* Parameter adjustments */
+ --iwork;
+ --dr;
+ --dl;
+ --d__;
+ --iseed;
+
+ /* Function Body */
+ if (*i__ < 1 || *i__ > *m || *j < 1 || *j > *n) {
+ ret_val = 0.;
+ return ret_val;
+ }
+
+/* Check for banding */
+
+ if (*j > *i__ + *ku || *j < *i__ - *kl) {
+ ret_val = 0.;
+ return ret_val;
+ }
+
+/* Check for sparsity */
+
+ if (*sparse > 0.) {
+ if (dlaran_(&iseed[1]) < *sparse) {
+ ret_val = 0.;
+ return ret_val;
+ }
+ }
+
+/* Compute subscripts depending on IPVTNG */
+
+ if (*ipvtng == 0) {
+ isub = *i__;
+ jsub = *j;
+ } else if (*ipvtng == 1) {
+ isub = iwork[*i__];
+ jsub = *j;
+ } else if (*ipvtng == 2) {
+ isub = *i__;
+ jsub = iwork[*j];
+ } else if (*ipvtng == 3) {
+ isub = iwork[*i__];
+ jsub = iwork[*j];
+ }
+
+/* Compute entry and grade it according to IGRADE */
+
+ if (isub == jsub) {
+ temp = d__[isub];
+ } else {
+ temp = dlarnd_(idist, &iseed[1]);
+ }
+ if (*igrade == 1) {
+ temp *= dl[isub];
+ } else if (*igrade == 2) {
+ temp *= dr[jsub];
+ } else if (*igrade == 3) {
+ temp = temp * dl[isub] * dr[jsub];
+ } else if (*igrade == 4 && isub != jsub) {
+ temp = temp * dl[isub] / dl[jsub];
+ } else if (*igrade == 5) {
+ temp = temp * dl[isub] * dl[jsub];
+ }
+ ret_val = temp;
+ return ret_val;
+
+/* End of DLATM2 */
+
+} /* dlatm2_ */
diff --git a/examples/ludinv/dlatm3.c b/examples/ludinv/dlatm3.c
--- /dev/null
+++ b/examples/ludinv/dlatm3.c
@@ -0,0 +1,261 @@
+/* dlatm3.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+doublereal dlatm3_(integer *m, integer *n, integer *i__, integer *j, integer *
+ isub, integer *jsub, integer *kl, integer *ku, integer *idist,
+ integer *iseed, doublereal *d__, integer *igrade, doublereal *dl,
+ doublereal *dr, integer *ipvtng, integer *iwork, doublereal *sparse)
+{
+ /* System generated locals */
+ doublereal ret_val;
+
+ /* Local variables */
+ doublereal temp;
+ extern doublereal dlaran_(integer *), dlarnd_(integer *, integer *);
+
+
+/* -- LAPACK auxiliary test routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+
+/* .. */
+
+/* .. Array Arguments .. */
+
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLATM3 returns the (ISUB,JSUB) entry of a random matrix of */
+/* dimension (M, N) described by the other paramters. (ISUB,JSUB) */
+/* is the final position of the (I,J) entry after pivoting */
+/* according to IPVTNG and IWORK. DLATM3 is called by the */
+/* DLATMR routine in order to build random test matrices. No error */
+/* checking on parameters is done, because this routine is called in */
+/* a tight loop by DLATMR which has already checked the parameters. */
+
+/* Use of DLATM3 differs from SLATM2 in the order in which the random */
+/* number generator is called to fill in random matrix entries. */
+/* With DLATM2, the generator is called to fill in the pivoted matrix */
+/* columnwise. With DLATM3, the generator is called to fill in the */
+/* matrix columnwise, after which it is pivoted. Thus, DLATM3 can */
+/* be used to construct random matrices which differ only in their */
+/* order of rows and/or columns. DLATM2 is used to construct band */
+/* matrices while avoiding calling the random number generator for */
+/* entries outside the band (and therefore generating random numbers */
+/* in different orders for different pivot orders). */
+
+/* The matrix whose (ISUB,JSUB) entry is returned is constructed as */
+/* follows (this routine only computes one entry): */
+
+/* If ISUB is outside (1..M) or JSUB is outside (1..N), return zero */
+/* (this is convenient for generating matrices in band format). */
+
+/* Generate a matrix A with random entries of distribution IDIST. */
+
+/* Set the diagonal to D. */
+
+/* Grade the matrix, if desired, from the left (by DL) and/or */
+/* from the right (by DR or DL) as specified by IGRADE. */
+
+/* Permute, if desired, the rows and/or columns as specified by */
+/* IPVTNG and IWORK. */
+
+/* Band the matrix to have lower bandwidth KL and upper */
+/* bandwidth KU. */
+
+/* Set random entries to zero as specified by SPARSE. */
+
+/* Arguments */
+/* ========= */
+
+/* M - INTEGER */
+/* Number of rows of matrix. Not modified. */
+
+/* N - INTEGER */
+/* Number of columns of matrix. Not modified. */
+
+/* I - INTEGER */
+/* Row of unpivoted entry to be returned. Not modified. */
+
+/* J - INTEGER */
+/* Column of unpivoted entry to be returned. Not modified. */
+
+/* ISUB - INTEGER */
+/* Row of pivoted entry to be returned. Changed on exit. */
+
+/* JSUB - INTEGER */
+/* Column of pivoted entry to be returned. Changed on exit. */
+
+/* KL - INTEGER */
+/* Lower bandwidth. Not modified. */
+
+/* KU - INTEGER */
+/* Upper bandwidth. Not modified. */
+
+/* IDIST - INTEGER */
+/* On entry, IDIST specifies the type of distribution to be */
+/* used to generate a random matrix . */
+/* 1 => UNIFORM( 0, 1 ) */
+/* 2 => UNIFORM( -1, 1 ) */
+/* 3 => NORMAL( 0, 1 ) */
+/* Not modified. */
+
+/* ISEED - INTEGER array of dimension ( 4 ) */
+/* Seed for random number generator. */
+/* Changed on exit. */
+
+/* D - DOUBLE PRECISION array of dimension ( MIN( I , J ) ) */
+/* Diagonal entries of matrix. Not modified. */
+
+/* IGRADE - INTEGER */
+/* Specifies grading of matrix as follows: */
+/* 0 => no grading */
+/* 1 => matrix premultiplied by diag( DL ) */
+/* 2 => matrix postmultiplied by diag( DR ) */
+/* 3 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DR ) */
+/* 4 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by inv( diag( DL ) ) */
+/* 5 => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DL ) */
+/* Not modified. */
+
+/* DL - DOUBLE PRECISION array ( I or J, as appropriate ) */
+/* Left scale factors for grading matrix. Not modified. */
+
+/* DR - DOUBLE PRECISION array ( I or J, as appropriate ) */
+/* Right scale factors for grading matrix. Not modified. */
+
+/* IPVTNG - INTEGER */
+/* On entry specifies pivoting permutations as follows: */
+/* 0 => none. */
+/* 1 => row pivoting. */
+/* 2 => column pivoting. */
+/* 3 => full pivoting, i.e., on both sides. */
+/* Not modified. */
+
+/* IWORK - INTEGER array ( I or J, as appropriate ) */
+/* This array specifies the permutation used. The */
+/* row (or column) originally in position K is in */
+/* position IWORK( K ) after pivoting. */
+/* This differs from IWORK for DLATM2. Not modified. */
+
+/* SPARSE - DOUBLE PRECISION between 0. and 1. */
+/* On entry specifies the sparsity of the matrix */
+/* if sparse matix is to be generated. */
+/* SPARSE should lie between 0 and 1. */
+/* A uniform ( 0, 1 ) random number x is generated and */
+/* compared to SPARSE; if x is larger the matrix entry */
+/* is unchanged and if x is smaller the entry is set */
+/* to zero. Thus on the average a fraction SPARSE of the */
+/* entries will be set to zero. */
+/* Not modified. */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+
+/* .. */
+
+/* .. Local Scalars .. */
+
+/* .. */
+
+/* .. External Functions .. */
+
+/* .. */
+
+/* ----------------------------------------------------------------------- */
+
+/* .. Executable Statements .. */
+
+
+/* Check for I and J in range */
+
+ /* Parameter adjustments */
+ --iwork;
+ --dr;
+ --dl;
+ --d__;
+ --iseed;
+
+ /* Function Body */
+ if (*i__ < 1 || *i__ > *m || *j < 1 || *j > *n) {
+ *isub = *i__;
+ *jsub = *j;
+ ret_val = 0.;
+ return ret_val;
+ }
+
+/* Compute subscripts depending on IPVTNG */
+
+ if (*ipvtng == 0) {
+ *isub = *i__;
+ *jsub = *j;
+ } else if (*ipvtng == 1) {
+ *isub = iwork[*i__];
+ *jsub = *j;
+ } else if (*ipvtng == 2) {
+ *isub = *i__;
+ *jsub = iwork[*j];
+ } else if (*ipvtng == 3) {
+ *isub = iwork[*i__];
+ *jsub = iwork[*j];
+ }
+
+/* Check for banding */
+
+ if (*jsub > *isub + *ku || *jsub < *isub - *kl) {
+ ret_val = 0.;
+ return ret_val;
+ }
+
+/* Check for sparsity */
+
+ if (*sparse > 0.) {
+ if (dlaran_(&iseed[1]) < *sparse) {
+ ret_val = 0.;
+ return ret_val;
+ }
+ }
+
+/* Compute entry and grade it according to IGRADE */
+
+ if (*i__ == *j) {
+ temp = d__[*i__];
+ } else {
+ temp = dlarnd_(idist, &iseed[1]);
+ }
+ if (*igrade == 1) {
+ temp *= dl[*i__];
+ } else if (*igrade == 2) {
+ temp *= dr[*j];
+ } else if (*igrade == 3) {
+ temp = temp * dl[*i__] * dr[*j];
+ } else if (*igrade == 4 && *i__ != *j) {
+ temp = temp * dl[*i__] / dl[*j];
+ } else if (*igrade == 5) {
+ temp = temp * dl[*i__] * dl[*j];
+ }
+ ret_val = temp;
+ return ret_val;
+
+/* End of DLATM3 */
+
+} /* dlatm3_ */
diff --git a/examples/ludinv/dlatmr.c b/examples/ludinv/dlatmr.c
--- /dev/null
+++ b/examples/ludinv/dlatmr.c
@@ -0,0 +1,1288 @@
+/* dlatmr.f -- translated by f2c (version 20061008).
+ You must link the resulting object file with libf2c:
+ on Microsoft Windows system, link with libf2c.lib;
+ on Linux or Unix systems, link with .../path/to/libf2c.a -lm
+ or, if you install libf2c.a in a standard place, with -lf2c -lm
+ -- in that order, at the end of the command line, as in
+ cc *.o -lf2c -lm
+ Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
+
+ http://www.netlib.org/f2c/libf2c.zip
+*/
+
+#include "f2c.h"
+#include "blaswrap.h"
+
+/* Table of constant values */
+
+static integer c__0 = 0;
+static integer c__1 = 1;
+
+/* Subroutine */ int dlatmr_(integer *m, integer *n, char *dist, integer *
+ iseed, char *sym, doublereal *d__, integer *mode, doublereal *cond,
+ doublereal *dmax__, char *rsign, char *grade, doublereal *dl, integer
+ *model, doublereal *condl, doublereal *dr, integer *moder, doublereal
+ *condr, char *pivtng, integer *ipivot, integer *kl, integer *ku,
+ doublereal *sparse, doublereal *anorm, char *pack, doublereal *a,
+ integer *lda, integer *iwork, integer *info)
+{
+ /* System generated locals */
+ integer a_dim1, a_offset, i__1, i__2;
+ doublereal d__1, d__2, d__3;
+
+ /* Local variables */
+ integer i__, j, k, kll, kuu, isub, jsub;
+ doublereal temp;
+ integer isym;
+ doublereal alpha;
+ extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+ integer *);
+ integer ipack;
+ extern logical lsame_(char *, char *);
+ doublereal tempa[1];
+ integer iisub, idist, jjsub, mnmin;
+ logical dzero;
+ integer mnsub;
+ doublereal onorm;
+ integer mxsub, npvts;
+ extern /* Subroutine */ int dlatm1_(integer *, doublereal *, integer *,
+ integer *, integer *, doublereal *, integer *, integer *);
+ extern doublereal dlatm2_(integer *, integer *, integer *, integer *,
+ integer *, integer *, integer *, integer *, doublereal *, integer
+ *, doublereal *, doublereal *, integer *, integer *, doublereal *)
+ , dlatm3_(integer *, integer *, integer *, integer *, integer *,
+ integer *, integer *, integer *, integer *, integer *, doublereal
+ *, integer *, doublereal *, doublereal *, integer *, integer *,
+ doublereal *), dlangb_(char *, integer *, integer *, integer *,
+ doublereal *, integer *, doublereal *), dlange_(char *,
+ integer *, integer *, doublereal *, integer *, doublereal *);
+ integer igrade;
+ extern doublereal dlansb_(char *, char *, integer *, integer *,
+ doublereal *, integer *, doublereal *);
+ logical fulbnd;
+ extern /* Subroutine */ int xerbla_(char *, integer *);
+ logical badpvt;
+ extern doublereal dlansp_(char *, char *, integer *, doublereal *,
+ doublereal *), dlansy_(char *, char *, integer *,
+ doublereal *, integer *, doublereal *);
+ integer irsign, ipvtng;
+
+
+/* -- LAPACK test routine (version 3.1) -- */
+/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
+/* November 2006 */
+
+/* .. Scalar Arguments .. */
+/* .. */
+/* .. Array Arguments .. */
+/* .. */
+
+/* Purpose */
+/* ======= */
+
+/* DLATMR generates random matrices of various types for testing */
+/* LAPACK programs. */
+
+/* DLATMR operates by applying the following sequence of */
+/* operations: */
+
+/* Generate a matrix A with random entries of distribution DIST */
+/* which is symmetric if SYM='S', and nonsymmetric */
+/* if SYM='N'. */
+
+/* Set the diagonal to D, where D may be input or */
+/* computed according to MODE, COND, DMAX and RSIGN */
+/* as described below. */
+
+/* Grade the matrix, if desired, from the left and/or right */
+/* as specified by GRADE. The inputs DL, MODEL, CONDL, DR, */
+/* MODER and CONDR also determine the grading as described */
+/* below. */
+
+/* Permute, if desired, the rows and/or columns as specified by */
+/* PIVTNG and IPIVOT. */
+
+/* Set random entries to zero, if desired, to get a random sparse */
+/* matrix as specified by SPARSE. */
+
+/* Make A a band matrix, if desired, by zeroing out the matrix */
+/* outside a band of lower bandwidth KL and upper bandwidth KU. */
+
+/* Scale A, if desired, to have maximum entry ANORM. */
+
+/* Pack the matrix if desired. Options specified by PACK are: */
+/* no packing */
+/* zero out upper half (if symmetric) */
+/* zero out lower half (if symmetric) */
+/* store the upper half columnwise (if symmetric or */
+/* square upper triangular) */
+/* store the lower half columnwise (if symmetric or */
+/* square lower triangular) */
+/* same as upper half rowwise if symmetric */
+/* store the lower triangle in banded format (if symmetric) */
+/* store the upper triangle in banded format (if symmetric) */
+/* store the entire matrix in banded format */
+
+/* Note: If two calls to DLATMR differ only in the PACK parameter, */
+/* they will generate mathematically equivalent matrices. */
+
+/* If two calls to DLATMR both have full bandwidth (KL = M-1 */
+/* and KU = N-1), and differ only in the PIVTNG and PACK */
+/* parameters, then the matrices generated will differ only */
+/* in the order of the rows and/or columns, and otherwise */
+/* contain the same data. This consistency cannot be and */
+/* is not maintained with less than full bandwidth. */
+
+/* Arguments */
+/* ========= */
+
+/* M - INTEGER */
+/* Number of rows of A. Not modified. */
+
+/* N - INTEGER */
+/* Number of columns of A. Not modified. */
+
+/* DIST - CHARACTER*1 */
+/* On entry, DIST specifies the type of distribution to be used */
+/* to generate a random matrix . */
+/* 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) */
+/* 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) */
+/* 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) */
+/* Not modified. */
+
+/* ISEED - INTEGER array, dimension (4) */
+/* On entry ISEED specifies the seed of the random number */
+/* generator. They should lie between 0 and 4095 inclusive, */
+/* and ISEED(4) should be odd. The random number generator */
+/* uses a linear congruential sequence limited to small */
+/* integers, and so should produce machine independent */
+/* random numbers. The values of ISEED are changed on */
+/* exit, and can be used in the next call to DLATMR */
+/* to continue the same random number sequence. */
+/* Changed on exit. */
+
+/* SYM - CHARACTER*1 */
+/* If SYM='S' or 'H', generated matrix is symmetric. */
+/* If SYM='N', generated matrix is nonsymmetric. */
+/* Not modified. */
+
+/* D - DOUBLE PRECISION array, dimension (min(M,N)) */
+/* On entry this array specifies the diagonal entries */
+/* of the diagonal of A. D may either be specified */
+/* on entry, or set according to MODE and COND as described */
+/* below. May be changed on exit if MODE is nonzero. */
+
+/* MODE - INTEGER */
+/* On entry describes how D is to be used: */
+/* MODE = 0 means use D as input */
+/* MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND */
+/* MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND */
+/* MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) */
+/* MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) */
+/* MODE = 5 sets D to random numbers in the range */
+/* ( 1/COND , 1 ) such that their logarithms */
+/* are uniformly distributed. */
+/* MODE = 6 set D to random numbers from same distribution */
+/* as the rest of the matrix. */
+/* MODE < 0 has the same meaning as ABS(MODE), except that */
+/* the order of the elements of D is reversed. */
+/* Thus if MODE is positive, D has entries ranging from */
+/* 1 to 1/COND, if negative, from 1/COND to 1, */
+/* Not modified. */
+
+/* COND - DOUBLE PRECISION */
+/* On entry, used as described under MODE above. */
+/* If used, it must be >= 1. Not modified. */
+
+/* DMAX - DOUBLE PRECISION */
+/* If MODE neither -6, 0 nor 6, the diagonal is scaled by */
+/* DMAX / max(abs(D(i))), so that maximum absolute entry */
+/* of diagonal is abs(DMAX). If DMAX is negative (or zero), */
+/* diagonal will be scaled by a negative number (or zero). */
+
+/* RSIGN - CHARACTER*1 */
+/* If MODE neither -6, 0 nor 6, specifies sign of diagonal */
+/* as follows: */
+/* 'T' => diagonal entries are multiplied by 1 or -1 */
+/* with probability .5 */
+/* 'F' => diagonal unchanged */
+/* Not modified. */
+
+/* GRADE - CHARACTER*1 */
+/* Specifies grading of matrix as follows: */
+/* 'N' => no grading */
+/* 'L' => matrix premultiplied by diag( DL ) */
+/* (only if matrix nonsymmetric) */
+/* 'R' => matrix postmultiplied by diag( DR ) */
+/* (only if matrix nonsymmetric) */
+/* 'B' => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DR ) */
+/* (only if matrix nonsymmetric) */
+/* 'S' or 'H' => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by diag( DL ) */
+/* ('S' for symmetric, or 'H' for Hermitian) */
+/* 'E' => matrix premultiplied by diag( DL ) and */
+/* postmultiplied by inv( diag( DL ) ) */
+/* ( 'E' for eigenvalue invariance) */
+/* (only if matrix nonsymmetric) */
+/* Note: if GRADE='E', then M must equal N. */
+/* Not modified. */
+
+/* DL - DOUBLE PRECISION array, dimension (M) */
+/* If MODEL=0, then on entry this array specifies the diagonal */
+/* entries of a diagonal matrix used as described under GRADE */
+/* above. If MODEL is not zero, then DL will be set according */
+/* to MODEL and CONDL, analogous to the way D is set according */
+/* to MODE and COND (except there is no DMAX parameter for DL). */
+/* If GRADE='E', then DL cannot have zero entries. */
+/* Not referenced if GRADE = 'N' or 'R'. Changed on exit. */
+
+/* MODEL - INTEGER */
+/* This specifies how the diagonal array DL is to be computed, */
+/* just as MODE specifies how D is to be computed. */
+/* Not modified. */
+
+/* CONDL - DOUBLE PRECISION */
+/* When MODEL is not zero, this specifies the condition number */
+/* of the computed DL. Not modified. */
+
+/* DR - DOUBLE PRECISION array, dimension (N) */
+/* If MODER=0, then on entry this array specifies the diagonal */
+/* entries of a diagonal matrix used as described under GRADE */
+/* above. If MODER is not zero, then DR will be set according */
+/* to MODER and CONDR, analogous to the way D is set according */
+/* to MODE and COND (except there is no DMAX parameter for DR). */
+/* Not referenced if GRADE = 'N', 'L', 'H', 'S' or 'E'. */
+/* Changed on exit. */
+
+/* MODER - INTEGER */
+/* This specifies how the diagonal array DR is to be computed, */
+/* just as MODE specifies how D is to be computed. */
+/* Not modified. */
+
+/* CONDR - DOUBLE PRECISION */
+/* When MODER is not zero, this specifies the condition number */
+/* of the computed DR. Not modified. */
+
+/* PIVTNG - CHARACTER*1 */
+/* On entry specifies pivoting permutations as follows: */
+/* 'N' or ' ' => none. */
+/* 'L' => left or row pivoting (matrix must be nonsymmetric). */
+/* 'R' => right or column pivoting (matrix must be */
+/* nonsymmetric). */
+/* 'B' or 'F' => both or full pivoting, i.e., on both sides. */
+/* In this case, M must equal N */
+
+/* If two calls to DLATMR both have full bandwidth (KL = M-1 */
+/* and KU = N-1), and differ only in the PIVTNG and PACK */
+/* parameters, then the matrices generated will differ only */
+/* in the order of the rows and/or columns, and otherwise */
+/* contain the same data. This consistency cannot be */
+/* maintained with less than full bandwidth. */
+
+/* IPIVOT - INTEGER array, dimension (N or M) */
+/* This array specifies the permutation used. After the */
+/* basic matrix is generated, the rows, columns, or both */
+/* are permuted. If, say, row pivoting is selected, DLATMR */
+/* starts with the *last* row and interchanges the M-th and */
+/* IPIVOT(M)-th rows, then moves to the next-to-last row, */
+/* interchanging the (M-1)-th and the IPIVOT(M-1)-th rows, */
+/* and so on. In terms of "2-cycles", the permutation is */
+/* (1 IPIVOT(1)) (2 IPIVOT(2)) ... (M IPIVOT(M)) */
+/* where the rightmost cycle is applied first. This is the */
+/* *inverse* of the effect of pivoting in LINPACK. The idea */
+/* is that factoring (with pivoting) an identity matrix */
+/* which has been inverse-pivoted in this way should */
+/* result in a pivot vector identical to IPIVOT. */
+/* Not referenced if PIVTNG = 'N'. Not modified. */
+
+/* SPARSE - DOUBLE PRECISION */
+/* On entry specifies the sparsity of the matrix if a sparse */
+/* matrix is to be generated. SPARSE should lie between */
+/* 0 and 1. To generate a sparse matrix, for each matrix entry */
+/* a uniform ( 0, 1 ) random number x is generated and */
+/* compared to SPARSE; if x is larger the matrix entry */
+/* is unchanged and if x is smaller the entry is set */
+/* to zero. Thus on the average a fraction SPARSE of the */
+/* entries will be set to zero. */
+/* Not modified. */
+
+/* KL - INTEGER */
+/* On entry specifies the lower bandwidth of the matrix. For */
+/* example, KL=0 implies upper triangular, KL=1 implies upper */
+/* Hessenberg, and KL at least M-1 implies the matrix is not */
+/* banded. Must equal KU if matrix is symmetric. */
+/* Not modified. */
+
+/* KU - INTEGER */
+/* On entry specifies the upper bandwidth of the matrix. For */
+/* example, KU=0 implies lower triangular, KU=1 implies lower */
+/* Hessenberg, and KU at least N-1 implies the matrix is not */
+/* banded. Must equal KL if matrix is symmetric. */
+/* Not modified. */
+
+/* ANORM - DOUBLE PRECISION */
+/* On entry specifies maximum entry of output matrix */
+/* (output matrix will by multiplied by a constant so that */
+/* its largest absolute entry equal ANORM) */
+/* if ANORM is nonnegative. If ANORM is negative no scaling */
+/* is done. Not modified. */
+
+/* PACK - CHARACTER*1 */
+/* On entry specifies packing of matrix as follows: */
+/* 'N' => no packing */
+/* 'U' => zero out all subdiagonal entries (if symmetric) */
+/* 'L' => zero out all superdiagonal entries (if symmetric) */
+/* 'C' => store the upper triangle columnwise */
+/* (only if matrix symmetric or square upper triangular) */
+/* 'R' => store the lower triangle columnwise */
+/* (only if matrix symmetric or square lower triangular) */
+/* (same as upper half rowwise if symmetric) */
+/* 'B' => store the lower triangle in band storage scheme */
+/* (only if matrix symmetric) */
+/* 'Q' => store the upper triangle in band storage scheme */
+/* (only if matrix symmetric) */
+/* 'Z' => store the entire matrix in band storage scheme */
+/* (pivoting can be provided for by using this */
+/* option to store A in the trailing rows of */
+/* the allocated storage) */
+
+/* Using these options, the various LAPACK packed and banded */
+/* storage schemes can be obtained: */
+/* GB - use 'Z' */
+/* PB, SB or TB - use 'B' or 'Q' */
+/* PP, SP or TP - use 'C' or 'R' */
+
+/* If two calls to DLATMR differ only in the PACK parameter, */
+/* they will generate mathematically equivalent matrices. */
+/* Not modified. */
+
+/* A - DOUBLE PRECISION array, dimension (LDA,N) */
+/* On exit A is the desired test matrix. Only those */
+/* entries of A which are significant on output */
+/* will be referenced (even if A is in packed or band */
+/* storage format). The 'unoccupied corners' of A in */
+/* band format will be zeroed out. */
+
+/* LDA - INTEGER */
+/* on entry LDA specifies the first dimension of A as */
+/* declared in the calling program. */
+/* If PACK='N', 'U' or 'L', LDA must be at least max ( 1, M ). */
+/* If PACK='C' or 'R', LDA must be at least 1. */
+/* If PACK='B', or 'Q', LDA must be MIN ( KU+1, N ) */
+/* If PACK='Z', LDA must be at least KUU+KLL+1, where */
+/* KUU = MIN ( KU, N-1 ) and KLL = MIN ( KL, N-1 ) */
+/* Not modified. */
+
+/* IWORK - INTEGER array, dimension ( N or M) */
+/* Workspace. Not referenced if PIVTNG = 'N'. Changed on exit. */
+
+/* INFO - INTEGER */
+/* Error parameter on exit: */
+/* 0 => normal return */
+/* -1 => M negative or unequal to N and SYM='S' or 'H' */
+/* -2 => N negative */
+/* -3 => DIST illegal string */
+/* -5 => SYM illegal string */
+/* -7 => MODE not in range -6 to 6 */
+/* -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 */
+/* -10 => MODE neither -6, 0 nor 6 and RSIGN illegal string */
+/* -11 => GRADE illegal string, or GRADE='E' and */
+/* M not equal to N, or GRADE='L', 'R', 'B' or 'E' and */
+/* SYM = 'S' or 'H' */
+/* -12 => GRADE = 'E' and DL contains zero */
+/* -13 => MODEL not in range -6 to 6 and GRADE= 'L', 'B', 'H', */
+/* 'S' or 'E' */
+/* -14 => CONDL less than 1.0, GRADE='L', 'B', 'H', 'S' or 'E', */
+/* and MODEL neither -6, 0 nor 6 */
+/* -16 => MODER not in range -6 to 6 and GRADE= 'R' or 'B' */
+/* -17 => CONDR less than 1.0, GRADE='R' or 'B', and */
+/* MODER neither -6, 0 nor 6 */
+/* -18 => PIVTNG illegal string, or PIVTNG='B' or 'F' and */
+/* M not equal to N, or PIVTNG='L' or 'R' and SYM='S' */
+/* or 'H' */
+/* -19 => IPIVOT contains out of range number and */
+/* PIVTNG not equal to 'N' */
+/* -20 => KL negative */
+/* -21 => KU negative, or SYM='S' or 'H' and KU not equal to KL */
+/* -22 => SPARSE not in range 0. to 1. */
+/* -24 => PACK illegal string, or PACK='U', 'L', 'B' or 'Q' */
+/* and SYM='N', or PACK='C' and SYM='N' and either KL */
+/* not equal to 0 or N not equal to M, or PACK='R' and */
+/* SYM='N', and either KU not equal to 0 or N not equal */
+/* to M */
+/* -26 => LDA too small */
+/* 1 => Error return from DLATM1 (computing D) */
+/* 2 => Cannot scale diagonal to DMAX (max. entry is 0) */
+/* 3 => Error return from DLATM1 (computing DL) */
+/* 4 => Error return from DLATM1 (computing DR) */
+/* 5 => ANORM is positive, but matrix constructed prior to */
+/* attempting to scale it to have norm ANORM, is zero */
+
+/* ===================================================================== */
+
+/* .. Parameters .. */
+/* .. */
+/* .. Local Scalars .. */
+/* .. */
+/* .. Local Arrays .. */
+/* .. */
+/* .. External Functions .. */
+/* .. */
+/* .. External Subroutines .. */
+/* .. */
+/* .. Intrinsic Functions .. */
+/* .. */
+/* .. Executable Statements .. */
+
+/* 1) Decode and Test the input parameters. */
+/* Initialize flags & seed. */
+
+ /* Parameter adjustments */
+ --iseed;
+ --d__;
+ --dl;
+ --dr;
+ --ipivot;
+ a_dim1 = *lda;
+ a_offset = 1 + a_dim1;
+ a -= a_offset;
+ --iwork;
+
+ /* Function Body */
+ *info = 0;
+
+/* Quick return if possible */
+
+ if (*m == 0 || *n == 0) {
+ return 0;
+ }
+
+/* Decode DIST */
+
+ if (lsame_(dist, "U")) {
+ idist = 1;
+ } else if (lsame_(dist, "S")) {
+ idist = 2;
+ } else if (lsame_(dist, "N")) {
+ idist = 3;
+ } else {
+ idist = -1;
+ }
+
+/* Decode SYM */
+
+ if (lsame_(sym, "S")) {
+ isym = 0;
+ } else if (lsame_(sym, "N")) {
+ isym = 1;
+ } else if (lsame_(sym, "H")) {
+ isym = 0;
+ } else {
+ isym = -1;
+ }
+
+/* Decode RSIGN */
+
+ if (lsame_(rsign, "F")) {
+ irsign = 0;
+ } else if (lsame_(rsign, "T")) {
+ irsign = 1;
+ } else {
+ irsign = -1;
+ }
+
+/* Decode PIVTNG */
+
+ if (lsame_(pivtng, "N")) {
+ ipvtng = 0;
+ } else if (lsame_(pivtng, " ")) {
+ ipvtng = 0;
+ } else if (lsame_(pivtng, "L")) {
+ ipvtng = 1;
+ npvts = *m;
+ } else if (lsame_(pivtng, "R")) {
+ ipvtng = 2;
+ npvts = *n;
+ } else if (lsame_(pivtng, "B")) {
+ ipvtng = 3;
+ npvts = min(*n,*m);
+ } else if (lsame_(pivtng, "F")) {
+ ipvtng = 3;
+ npvts = min(*n,*m);
+ } else {
+ ipvtng = -1;
+ }
+
+/* Decode GRADE */
+
+ if (lsame_(grade, "N")) {
+ igrade = 0;
+ } else if (lsame_(grade, "L")) {
+ igrade = 1;
+ } else if (lsame_(grade, "R")) {
+ igrade = 2;
+ } else if (lsame_(grade, "B")) {
+ igrade = 3;
+ } else if (lsame_(grade, "E")) {
+ igrade = 4;
+ } else if (lsame_(grade, "H") || lsame_(grade,
+ "S")) {
+ igrade = 5;
+ } else {
+ igrade = -1;
+ }
+
+/* Decode PACK */
+
+ if (lsame_(pack, "N")) {
+ ipack = 0;
+ } else if (lsame_(pack, "U")) {
+ ipack = 1;
+ } else if (lsame_(pack, "L")) {
+ ipack = 2;
+ } else if (lsame_(pack, "C")) {
+ ipack = 3;
+ } else if (lsame_(pack, "R")) {
+ ipack = 4;
+ } else if (lsame_(pack, "B")) {
+ ipack = 5;
+ } else if (lsame_(pack, "Q")) {
+ ipack = 6;
+ } else if (lsame_(pack, "Z")) {
+ ipack = 7;
+ } else {
+ ipack = -1;
+ }
+
+/* Set certain internal parameters */
+
+ mnmin = min(*m,*n);
+/* Computing MIN */
+ i__1 = *kl, i__2 = *m - 1;
+ kll = min(i__1,i__2);
+/* Computing MIN */
+ i__1 = *ku, i__2 = *n - 1;
+ kuu = min(i__1,i__2);
+
+/* If inv(DL) is used, check to see if DL has a zero entry. */
+
+ dzero = FALSE_;
+ if (igrade == 4 && *model == 0) {
+ i__1 = *m;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ if (dl[i__] == 0.) {
+ dzero = TRUE_;
+ }
+/* L10: */
+ }
+ }
+
+/* Check values in IPIVOT */
+
+ badpvt = FALSE_;
+ if (ipvtng > 0) {
+ i__1 = npvts;
+ for (j = 1; j <= i__1; ++j) {
+ if (ipivot[j] <= 0 || ipivot[j] > npvts) {
+ badpvt = TRUE_;
+ }
+/* L20: */
+ }
+ }
+
+/* Set INFO if an error */
+
+ if (*m < 0) {
+ *info = -1;
+ } else if (*m != *n && isym == 0) {
+ *info = -1;
+ } else if (*n < 0) {
+ *info = -2;
+ } else if (idist == -1) {
+ *info = -3;
+ } else if (isym == -1) {
+ *info = -5;
+ } else if (*mode < -6 || *mode > 6) {
+ *info = -7;
+ } else if (*mode != -6 && *mode != 0 && *mode != 6 && *cond < 1.) {
+ *info = -8;
+ } else if (*mode != -6 && *mode != 0 && *mode != 6 && irsign == -1) {
+ *info = -10;
+ } else if (igrade == -1 || igrade == 4 && *m != *n || igrade >= 1 &&
+ igrade <= 4 && isym == 0) {
+ *info = -11;
+ } else if (igrade == 4 && dzero) {
+ *info = -12;
+ } else if ((igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) && (
+ *model < -6 || *model > 6)) {
+ *info = -13;
+ } else if ((igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) && (
+ *model != -6 && *model != 0 && *model != 6) && *condl < 1.) {
+ *info = -14;
+ } else if ((igrade == 2 || igrade == 3) && (*moder < -6 || *moder > 6)) {
+ *info = -16;
+ } else if ((igrade == 2 || igrade == 3) && (*moder != -6 && *moder != 0 &&
+ *moder != 6) && *condr < 1.) {
+ *info = -17;
+ } else if (ipvtng == -1 || ipvtng == 3 && *m != *n || (ipvtng == 1 ||
+ ipvtng == 2) && isym == 0) {
+ *info = -18;
+ } else if (ipvtng != 0 && badpvt) {
+ *info = -19;
+ } else if (*kl < 0) {
+ *info = -20;
+ } else if (*ku < 0 || isym == 0 && *kl != *ku) {
+ *info = -21;
+ } else if (*sparse < 0. || *sparse > 1.) {
+ *info = -22;
+ } else if (ipack == -1 || (ipack == 1 || ipack == 2 || ipack == 5 ||
+ ipack == 6) && isym == 1 || ipack == 3 && isym == 1 && (*kl != 0
+ || *m != *n) || ipack == 4 && isym == 1 && (*ku != 0 || *m != *n))
+ {
+ *info = -24;
+ } else if ((ipack == 0 || ipack == 1 || ipack == 2) && *lda < max(1,*m) ||
+ (ipack == 3 || ipack == 4) && *lda < 1 || (ipack == 5 || ipack ==
+ 6) && *lda < kuu + 1 || ipack == 7 && *lda < kll + kuu + 1) {
+ *info = -26;
+ }
+
+ if (*info != 0) {
+ i__1 = -(*info);
+ xerbla_("DLATMR", &i__1);
+ return 0;
+ }
+
+/* Decide if we can pivot consistently */
+
+ fulbnd = FALSE_;
+ if (kuu == *n - 1 && kll == *m - 1) {
+ fulbnd = TRUE_;
+ }
+
+/* Initialize random number generator */
+
+ for (i__ = 1; i__ <= 4; ++i__) {
+ iseed[i__] = (i__1 = iseed[i__], abs(i__1)) % 4096;
+/* L30: */
+ }
+
+ iseed[4] = (iseed[4] / 2 << 1) + 1;
+
+/* 2) Set up D, DL, and DR, if indicated. */
+
+/* Compute D according to COND and MODE */
+
+ dlatm1_(mode, cond, &irsign, &idist, &iseed[1], &d__[1], &mnmin, info);
+ if (*info != 0) {
+ *info = 1;
+ return 0;
+ }
+ if (*mode != 0 && *mode != -6 && *mode != 6) {
+
+/* Scale by DMAX */
+
+ temp = abs(d__[1]);
+ i__1 = mnmin;
+ for (i__ = 2; i__ <= i__1; ++i__) {
+/* Computing MAX */
+ d__2 = temp, d__3 = (d__1 = d__[i__], abs(d__1));
+ temp = max(d__2,d__3);
+/* L40: */
+ }
+ if (temp == 0. && *dmax__ != 0.) {
+ *info = 2;
+ return 0;
+ }
+ if (temp != 0.) {
+ alpha = *dmax__ / temp;
+ } else {
+ alpha = 1.;
+ }
+ i__1 = mnmin;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ d__[i__] = alpha * d__[i__];
+/* L50: */
+ }
+
+ }
+
+/* Compute DL if grading set */
+
+ if (igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) {
+ dlatm1_(model, condl, &c__0, &idist, &iseed[1], &dl[1], m, info);
+ if (*info != 0) {
+ *info = 3;
+ return 0;
+ }
+ }
+
+/* Compute DR if grading set */
+
+ if (igrade == 2 || igrade == 3) {
+ dlatm1_(moder, condr, &c__0, &idist, &iseed[1], &dr[1], n, info);
+ if (*info != 0) {
+ *info = 4;
+ return 0;
+ }
+ }
+
+/* 3) Generate IWORK if pivoting */
+
+ if (ipvtng > 0) {
+ i__1 = npvts;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ iwork[i__] = i__;
+/* L60: */
+ }
+ if (fulbnd) {
+ i__1 = npvts;
+ for (i__ = 1; i__ <= i__1; ++i__) {
+ k = ipivot[i__];
+ j = iwork[i__];
+ iwork[i__] = iwork[k];
+ iwork[k] = j;
+/* L70: */
+ }
+ } else {
+ for (i__ = npvts; i__ >= 1; --i__) {
+ k = ipivot[i__];
+ j = iwork[i__];
+ iwork[i__] = iwork[k];
+ iwork[k] = j;
+/* L80: */
+ }
+ }
+ }
+
+/* 4) Generate matrices for each kind of PACKing */
+/* Always sweep matrix columnwise (if symmetric, upper */
+/* half only) so that matrix generated does not depend */
+/* on PACK */
+
+ if (fulbnd) {
+
+/* Use DLATM3 so matrices generated with differing PIVOTing only */
+/* differ only in the order of their rows and/or columns. */
+
+ if (ipack == 0) {
+ if (isym == 0) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ a[isub + jsub * a_dim1] = temp;
+ a[jsub + isub * a_dim1] = temp;
+/* L90: */
+ }
+/* L100: */
+ }
+ } else if (isym == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = *m;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ a[isub + jsub * a_dim1] = temp;
+/* L110: */
+ }
+/* L120: */
+ }
+ }
+
+ } else if (ipack == 1) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ a[mnsub + mxsub * a_dim1] = temp;
+ if (mnsub != mxsub) {
+ a[mxsub + mnsub * a_dim1] = 0.;
+ }
+/* L130: */
+ }
+/* L140: */
+ }
+
+ } else if (ipack == 2) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ a[mxsub + mnsub * a_dim1] = temp;
+ if (mnsub != mxsub) {
+ a[mnsub + mxsub * a_dim1] = 0.;
+ }
+/* L150: */
+ }
+/* L160: */
+ }
+
+ } else if (ipack == 3) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+
+/* Compute K = location of (ISUB,JSUB) entry in packed */
+/* array */
+
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ k = mxsub * (mxsub - 1) / 2 + mnsub;
+
+/* Convert K to (IISUB,JJSUB) location */
+
+ jjsub = (k - 1) / *lda + 1;
+ iisub = k - *lda * (jjsub - 1);
+
+ a[iisub + jjsub * a_dim1] = temp;
+/* L170: */
+ }
+/* L180: */
+ }
+
+ } else if (ipack == 4) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+
+/* Compute K = location of (I,J) entry in packed array */
+
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ if (mnsub == 1) {
+ k = mxsub;
+ } else {
+ k = *n * (*n + 1) / 2 - (*n - mnsub + 1) * (*n -
+ mnsub + 2) / 2 + mxsub - mnsub + 1;
+ }
+
+/* Convert K to (IISUB,JJSUB) location */
+
+ jjsub = (k - 1) / *lda + 1;
+ iisub = k - *lda * (jjsub - 1);
+
+ a[iisub + jjsub * a_dim1] = temp;
+/* L190: */
+ }
+/* L200: */
+ }
+
+ } else if (ipack == 5) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ if (i__ < 1) {
+ a[j - i__ + 1 + (i__ + *n) * a_dim1] = 0.;
+ } else {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ a[mxsub - mnsub + 1 + mnsub * a_dim1] = temp;
+ }
+/* L210: */
+ }
+/* L220: */
+ }
+
+ } else if (ipack == 6) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ a[mnsub - mxsub + kuu + 1 + mxsub * a_dim1] = temp;
+/* L230: */
+ }
+/* L240: */
+ }
+
+ } else if (ipack == 7) {
+
+ if (isym == 0) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ mnsub = min(isub,jsub);
+ mxsub = max(isub,jsub);
+ a[mnsub - mxsub + kuu + 1 + mxsub * a_dim1] = temp;
+ if (i__ < 1) {
+ a[j - i__ + 1 + kuu + (i__ + *n) * a_dim1] = 0.;
+ }
+ if (i__ >= 1 && mnsub != mxsub) {
+ a[mxsub - mnsub + 1 + kuu + mnsub * a_dim1] =
+ temp;
+ }
+/* L250: */
+ }
+/* L260: */
+ }
+ } else if (isym == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j + kll;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ a[isub - jsub + kuu + 1 + jsub * a_dim1] = temp;
+/* L270: */
+ }
+/* L280: */
+ }
+ }
+
+ }
+
+ } else {
+
+/* Use DLATM2 */
+
+ if (ipack == 0) {
+ if (isym == 0) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku,
+ &idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+ a[j + i__ * a_dim1] = a[i__ + j * a_dim1];
+/* L290: */
+ }
+/* L300: */
+ }
+ } else if (isym == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = *m;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku,
+ &idist, &iseed[1], &d__[1], &igrade, &dl[1], &
+ dr[1], &ipvtng, &iwork[1], sparse);
+/* L310: */
+ }
+/* L320: */
+ }
+ }
+
+ } else if (ipack == 1) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+ if (i__ != j) {
+ a[j + i__ * a_dim1] = 0.;
+ }
+/* L330: */
+ }
+/* L340: */
+ }
+
+ } else if (ipack == 2) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ a[j + i__ * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, &
+ idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1]
+, &ipvtng, &iwork[1], sparse);
+ if (i__ != j) {
+ a[i__ + j * a_dim1] = 0.;
+ }
+/* L350: */
+ }
+/* L360: */
+ }
+
+ } else if (ipack == 3) {
+
+ isub = 0;
+ jsub = 1;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+ ++isub;
+ if (isub > *lda) {
+ isub = 1;
+ ++jsub;
+ }
+ a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku,
+ &idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[
+ 1], &ipvtng, &iwork[1], sparse);
+/* L370: */
+ }
+/* L380: */
+ }
+
+ } else if (ipack == 4) {
+
+ if (isym == 0) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = 1; i__ <= i__2; ++i__) {
+
+/* Compute K = location of (I,J) entry in packed array */
+
+ if (i__ == 1) {
+ k = j;
+ } else {
+ k = *n * (*n + 1) / 2 - (*n - i__ + 1) * (*n -
+ i__ + 2) / 2 + j - i__ + 1;
+ }
+
+/* Convert K to (ISUB,JSUB) location */
+
+ jsub = (k - 1) / *lda + 1;
+ isub = k - *lda * (jsub - 1);
+
+ a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl,
+ ku, &idist, &iseed[1], &d__[1], &igrade, &dl[
+ 1], &dr[1], &ipvtng, &iwork[1], sparse);
+/* L390: */
+ }
+/* L400: */
+ }
+ } else {
+ isub = 0;
+ jsub = 1;
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = *m;
+ for (i__ = j; i__ <= i__2; ++i__) {
+ ++isub;
+ if (isub > *lda) {
+ isub = 1;
+ ++jsub;
+ }
+ a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl,
+ ku, &idist, &iseed[1], &d__[1], &igrade, &dl[
+ 1], &dr[1], &ipvtng, &iwork[1], sparse);
+/* L410: */
+ }
+/* L420: */
+ }
+ }
+
+ } else if (ipack == 5) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ if (i__ < 1) {
+ a[j - i__ + 1 + (i__ + *n) * a_dim1] = 0.;
+ } else {
+ a[j - i__ + 1 + i__ * a_dim1] = dlatm2_(m, n, &i__, &
+ j, kl, ku, &idist, &iseed[1], &d__[1], &
+ igrade, &dl[1], &dr[1], &ipvtng, &iwork[1],
+ sparse);
+ }
+/* L430: */
+ }
+/* L440: */
+ }
+
+ } else if (ipack == 6) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, &i__, &
+ j, kl, ku, &idist, &iseed[1], &d__[1], &igrade, &
+ dl[1], &dr[1], &ipvtng, &iwork[1], sparse);
+/* L450: */
+ }
+/* L460: */
+ }
+
+ } else if (ipack == 7) {
+
+ if (isym == 0) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, &
+ i__, &j, kl, ku, &idist, &iseed[1], &d__[1], &
+ igrade, &dl[1], &dr[1], &ipvtng, &iwork[1],
+ sparse);
+ if (i__ < 1) {
+ a[j - i__ + 1 + kuu + (i__ + *n) * a_dim1] = 0.;
+ }
+ if (i__ >= 1 && i__ != j) {
+ a[j - i__ + 1 + kuu + i__ * a_dim1] = a[i__ - j +
+ kuu + 1 + j * a_dim1];
+ }
+/* L470: */
+ }
+/* L480: */
+ }
+ } else if (isym == 1) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = j + kll;
+ for (i__ = j - kuu; i__ <= i__2; ++i__) {
+ a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, &
+ i__, &j, kl, ku, &idist, &iseed[1], &d__[1], &
+ igrade, &dl[1], &dr[1], &ipvtng, &iwork[1],
+ sparse);
+/* L490: */
+ }
+/* L500: */
+ }
+ }
+
+ }
+
+ }
+
+/* 5) Scaling the norm */
+
+ if (ipack == 0) {
+ onorm = dlange_("M", m, n, &a[a_offset], lda, tempa);
+ } else if (ipack == 1) {
+ onorm = dlansy_("M", "U", n, &a[a_offset], lda, tempa);
+ } else if (ipack == 2) {
+ onorm = dlansy_("M", "L", n, &a[a_offset], lda, tempa);
+ } else if (ipack == 3) {
+ onorm = dlansp_("M", "U", n, &a[a_offset], tempa);
+ } else if (ipack == 4) {
+ onorm = dlansp_("M", "L", n, &a[a_offset], tempa);
+ } else if (ipack == 5) {
+ onorm = dlansb_("M", "L", n, &kll, &a[a_offset], lda, tempa);
+ } else if (ipack == 6) {
+ onorm = dlansb_("M", "U", n, &kuu, &a[a_offset], lda, tempa);
+ } else if (ipack == 7) {
+ onorm = dlangb_("M", n, &kll, &kuu, &a[a_offset], lda, tempa);
+ }
+
+ if (*anorm >= 0.) {
+
+ if (*anorm > 0. && onorm == 0.) {
+
+/* Desired scaling impossible */
+
+ *info = 5;
+ return 0;
+
+ } else if (*anorm > 1. && onorm < 1. || *anorm < 1. && onorm > 1.) {
+
+/* Scale carefully to avoid over / underflow */
+
+ if (ipack <= 2) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ d__1 = 1. / onorm;
+ dscal_(m, &d__1, &a[j * a_dim1 + 1], &c__1);
+ dscal_(m, anorm, &a[j * a_dim1 + 1], &c__1);
+/* L510: */
+ }
+
+ } else if (ipack == 3 || ipack == 4) {
+
+ i__1 = *n * (*n + 1) / 2;
+ d__1 = 1. / onorm;
+ dscal_(&i__1, &d__1, &a[a_offset], &c__1);
+ i__1 = *n * (*n + 1) / 2;
+ dscal_(&i__1, anorm, &a[a_offset], &c__1);
+
+ } else if (ipack >= 5) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = kll + kuu + 1;
+ d__1 = 1. / onorm;
+ dscal_(&i__2, &d__1, &a[j * a_dim1 + 1], &c__1);
+ i__2 = kll + kuu + 1;
+ dscal_(&i__2, anorm, &a[j * a_dim1 + 1], &c__1);
+/* L520: */
+ }
+
+ }
+
+ } else {
+
+/* Scale straightforwardly */
+
+ if (ipack <= 2) {
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ d__1 = *anorm / onorm;
+ dscal_(m, &d__1, &a[j * a_dim1 + 1], &c__1);
+/* L530: */
+ }
+
+ } else if (ipack == 3 || ipack == 4) {
+
+ i__1 = *n * (*n + 1) / 2;
+ d__1 = *anorm / onorm;
+ dscal_(&i__1, &d__1, &a[a_offset], &c__1);
+
+ } else if (ipack >= 5) {
+
+ i__1 = *n;
+ for (j = 1; j <= i__1; ++j) {
+ i__2 = kll + kuu + 1;
+ d__1 = *anorm / onorm;
+ dscal_(&i__2, &d__1, &a[j * a_dim1 + 1], &c__1);
+/* L540: */
+ }
+ }
+
+ }
+
+ }
+
+/* End of DLATMR */
+
+ return 0;
+} /* dlatmr_ */
diff --git a/examples/ludinv/main.c b/examples/ludinv/main.c
--- /dev/null
+++ b/examples/ludinv/main.c
@@ -0,0 +1,295 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "cblas.h"
+#ifdef __cplusplus
+}
+#endif
+
+/*-----------------------------------------------------------------------------
+* Timing Setup
+*----------------------------------------------------------------------------*/
+struct timespec t0,t1;
+#define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
+#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
+ t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
+
+extern int dgetrf_(int *, int *, double *, int *, int *, int *);
+extern int dgetri_(int *, double *, int *, int *, double *, int *, int *);
+extern int dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *);
+extern int dtrtri_(char *, char *, int *, double *, int *, int *);
+
+extern int dlatmr_(int *, int *, char *, int *, char *,
+ double *, int *, double *, double *, char *, char *,
+ double *, int *, double *, double *, int *,
+ double *, char *, int *, int *, int *,
+ double *, double *, char *, double *, int *,
+ int *, int *);
+extern int dlacpy_(char *, int *, int *, double *, int *, double *, int *);
+
+/* Auxiliary routines prototypes */
+void print_int_vector( char* desc, int n, int* a );
+void print_real_matrix(char* desc, double *mat, int n, int m, int ldv );
+double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld);
+void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run);
+
+/* Parameters */
+#define MATRIX_ORDER 5
+#define LDA MATRIX_ORDER
+
+/* Main program */
+int print_data;
+int main(int argc, char *argv[]) {
+ /* Locals */
+ float secs, secs_lud_inv, *secs_all_runs;
+ int num_test, num_run, i, j;
+ int n, n_min, n_inc, info, lwork, lda;
+ double lwork_opt, max_abs_error;
+ double *work;
+ double *matrix_A, *matrix_LUD;
+ double *vec_dbr_diagr, *vec_dbr_diag, *vec_dbr_diagl;
+ int *vec_int_pivot, *vec_int_work;
+ int *matrix_sizes;
+
+ int const_int6 = 6;
+ double const_dbr1 = 1.;
+ double const_dbr0 = 0.;
+ int rand_seed[4] = {10,123,278,3579}; /* numbers must lie between 0 and 4095 */
+
+ fprintf(stdout, "\n\n===== LAPACK example: matrix inversion through LUD =====\n");
+ if(argc == 1) { /* no command line arguments, use default */
+ num_test = 3;
+ n_min = 1000;
+ n_inc = 1000;
+ num_run = 1;
+ print_data = 0;
+ }
+ else if (argc < 6) {
+ printf("Usage: ludinv <num_test> <start_size> <size_inc> <num_run per size> <print data or not (1 or 0)>\n");
+ exit(0);
+ }
+ else {
+ num_test = atoi(argv[1]);
+ n_min = atoi(argv[2]);
+ n_inc = atoi(argv[3]);
+ num_run = atoi(argv[4]);
+ print_data = atoi(argv[5]);
+ }
+ rand_seed[0] = 4675;
+
+ secs_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
+ matrix_sizes = (int*)malloc(sizeof(int)*num_test);
+
+ for(i=0, n=n_min; i<num_test; i++, n+=n_inc)
+ {
+ lda = n;
+ matrix_sizes[i] = n;
+
+ printf("\n\nStart matrix inversion by LUD for matrix size %d. \n", n);
+
+ /* Operation counts of LUD inversion according to LAPACK Working Note 41 */
+ /*operation_counts = 2.*n*n*n - 1.5*n*n + 2.5*n; */
+
+ /* Allocate memory for matrix and vectors */
+ matrix_A = (double *)__malloc_ddr(sizeof(double)*n*n);
+ matrix_LUD = (double *)__malloc_ddr(sizeof(double)*n*n);
+ vec_int_pivot = (int *)__malloc_ddr(sizeof(int)*n);
+ vec_int_work = (int *)__malloc_ddr(sizeof(int)*n);
+ vec_dbr_diag = (double *)__malloc_ddr(sizeof(double)*n);
+ vec_dbr_diagl = (double *)__malloc_ddr(sizeof(double)*n);
+ vec_dbr_diagr = (double *)__malloc_ddr(sizeof(double)*n);
+
+ j = 0;
+ while(j<num_run) {
+ /* Generate a N by N random matrix */
+ printf("\nStart test run %d out of %d. \n", j+1, num_run);
+ printf("Generate %d by %d random matrix. \n", n, n);
+ dlatmr_(&n, &n, "S", rand_seed, "N", vec_dbr_diag, &const_int6, &const_dbr1,
+ &const_dbr1, "T", "N", vec_dbr_diagl, &const_int6, &const_dbr1, vec_dbr_diagr,
+ &const_int6, &const_dbr1, "N", vec_int_pivot, &n, &n, &const_dbr0,
+ &const_dbr1, "N", matrix_A, &lda, vec_int_work, &info);
+ if(info != 0) {
+ printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", info);
+ //goto START_NEW_RUN;
+ exit(0);
+ }
+
+ memcpy((void *)matrix_LUD, (void *)matrix_A, sizeof(double)*n*n);
+
+ print_real_matrix("Generated random matrix (double real): ", matrix_LUD, n, n, lda);
+
+ tick();
+
+ /* Computes LU factorization */
+ dgetrf_(&n, &n, matrix_LUD, &lda, vec_int_pivot, &info);
+
+ secs_lud_inv = tock();
+
+ if(info != 0) {
+ printf("LU factorization has a problem. Returned error value of dgetrf is %d. Start a new run. \n", info);
+ //goto START_NEW_RUN;
+ exit(0);
+ }
+
+ printf("LU decomposition finished. Time spent on LUD: %f seconds.\n", secs_lud_inv);
+
+ /* Print details of LU factorization */
+ print_real_matrix( "Details of LU factorization", matrix_LUD, n, n, lda);
+
+ /* Print pivot indices */
+ print_int_vector( "Pivot indices", n, vec_int_pivot );
+
+ tick();
+
+ /* Query and allocate the optimal workspace */
+ lwork = -1;
+ dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, &lwork_opt, &lwork, &info);
+ lwork = (int)lwork_opt;
+ work = (double *)__malloc_ddr( lwork*sizeof(double) );
+
+ /* Compute inversion of matrix based on LU factorization */
+ dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, work, &lwork, &info);
+
+ secs = tock();
+
+ __free_ddr((void *)work);
+ if(info != 0) {
+ printf("Matrix inversion through LU factorization has a problem.");
+ printf("Returned error value of dgetri is %d. Start a new run. \n", info);
+ //goto START_NEW_RUN;
+ exit(0);
+ }
+
+ secs_lud_inv += secs;
+ secs_all_runs[i*num_run+j] = secs_lud_inv;
+ printf("Matrix inversion finished. Time spent on inversion: %f seconds.\n", secs);
+ printf("Time spent on LUD and inversion: %f seconds.\n", secs_lud_inv);
+
+ /* Print solution */
+ print_real_matrix( "Inverse of matrix A through LU factorization:", matrix_LUD, n, n, lda);
+
+ /* Compute A*inv(A) and precision error: I-A*inv(A) */
+ max_abs_error = compute_inverse_error(matrix_A, matrix_LUD, n, lda);
+ printf("Maximum relative error of matrix inversion by LUD: %e\n", max_abs_error);
+
+ j++;
+
+START_NEW_RUN:
+ printf("Current run finished. \n");
+ } /* while(j<num_run) */
+
+ __free_ddr((void *)matrix_A);
+ __free_ddr((void *)matrix_LUD);
+ __free_ddr((void *)vec_int_pivot);
+ __free_ddr((void *)vec_int_work);
+ __free_ddr((void *)vec_dbr_diag);
+ __free_ddr((void *)vec_dbr_diagl);
+ __free_ddr((void *)vec_dbr_diagr);
+ } /* for(i=0, n=n_min; i<num_test; i++, n+=n_inc) */
+
+ print_performance(matrix_sizes, secs_all_runs, num_test, num_run);
+
+ exit( 0 );
+}
+
+
+/* Auxiliary routine: printing a vector of integers */
+void print_int_vector( char* desc, int n, int* a )
+{
+ int j;
+
+ if (!print_data)
+ return;
+
+ printf( "\n %s\n", desc );
+ for( j = 0; j < n; j++ ) printf( " %6i", a[j] );
+ printf( "\n" );
+}
+
+/* Auxiliary routine: printing a real matrix */
+void print_real_matrix(char* desc, double *mat, int m, int n, int ldv )
+{
+ int i, j;
+
+ if (!print_data)
+ return;
+
+ printf( "\n %s\n", desc );
+ for(i=0; i<m; i++) {
+ for(j=0; j<n; j++) {
+ printf( " %10.5f ", mat[i+j*ldv]);
+ }
+ printf( "\n" );
+ }
+}
+
+
+double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld)
+{
+ int i, j;
+ double abs_err, relative_err;
+ double *matrix_A_mult_A_inv;
+ double max_err = 0.;
+
+ matrix_A_mult_A_inv = (double *)malloc(n*n*sizeof(double));
+
+ cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,n,n,n,1.0,matrix_A,ld,
+ matrix_A_inverse,ld,0.0,matrix_A_mult_A_inv,ld);
+
+ /* sqrt(sum(err^2))/N, sum(abs(err))/N, max(abs(err)) */
+ for(i=0; i<n; i++) {
+ for(j=0; j<n; j++) {
+ if(i==j) {
+ abs_err = abs(matrix_A_mult_A_inv[i+j*ld]-1.);
+ }
+ else {
+ abs_err = abs(matrix_A_mult_A_inv[i+j*ld]);
+ }
+
+ relative_err = abs_err / abs(matrix_A[i+j*ld]);
+
+ if(relative_err > max_err) {
+ max_err = relative_err;
+ }
+ }
+ }
+
+ free(matrix_A_mult_A_inv);
+ return max_err;
+}
+
+
+void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run)
+{
+ int i, j;
+ float inst, min, max;
+ float tot, avg;
+
+ printf("Seconds for all matrix sizes:\n");
+ printf("Matrix size ");
+ printf(" Min Max Average ");
+ printf("\n");
+
+ for(i=0; i<num_test; i++)
+ {
+ printf("%11d ", matrix_sizes[i]);
+ tot = 0.;
+ max = 0.;
+ min = 1e100;
+ for(j=0; j<num_run; j++)
+ {
+ inst = secs_runs[i*num_run+j];
+ tot += (double)inst;
+
+ if(inst>max) max = inst;
+ if(inst<min) min = inst;
+ }
+ avg = tot/(double)num_run;
+ printf("%12f%12f%12f\n", min, max, avg);
+ }
+
+}
diff --git a/examples/make.inc b/examples/make.inc
index 0125a07bc946176900fb795bdf6ca3190ac8e911..ae1ce4087b805a9ab9238e77dd8fc8667df014ae 100644 (file)
--- a/examples/make.inc
+++ b/examples/make.inc
CC = gcc
CFLAGS = -g -O2 -I/usr/include
-BLAS_LIB_DIR = /usr/lib/
-BLASLIB = $(BLAS_LIB_DIR)libcblas_armplusdsp.a $(BLAS_LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp
-
+LIB_DIR = /usr/lib/
+BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp
+LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp
%.o: %.c
@$(CC) -c $(CFLAGS) $<