From 32068b87be05d0d7d022a18efae4927534e26961 Mon Sep 17 00:00:00 2001 From: Jianzhong Xu Date: Fri, 13 Mar 2015 16:19:33 -0400 Subject: [PATCH] Added eigen decomposition example. --- build/tar_files_list.txt | 1 + build/version.txt | 2 +- debian/ti-linalg.install | 4 - examples/eig/Makefile | 22 + examples/eig/dlaran.c | 123 ++++ examples/eig/dlarnd.c | 108 ++++ examples/eig/dlatm1.c | 283 +++++++++ examples/eig/dlatm2.c | 251 ++++++++ examples/eig/dlatm3.c | 261 ++++++++ examples/eig/dlatmr.c | 1288 ++++++++++++++++++++++++++++++++++++++ examples/eig/main.c | 459 ++++++++++++++ examples/ludinv/Makefile | 7 +- examples/ludinv/main.c | 32 +- examples/matmpy/Makefile | 6 +- examples/matmpy/main.c | 27 +- 15 files changed, 2835 insertions(+), 39 deletions(-) delete mode 100644 debian/ti-linalg.install create mode 100644 examples/eig/Makefile create mode 100644 examples/eig/dlaran.c create mode 100644 examples/eig/dlarnd.c create mode 100644 examples/eig/dlatm1.c create mode 100644 examples/eig/dlatm2.c create mode 100644 examples/eig/dlatm3.c create mode 100644 examples/eig/dlatmr.c create mode 100644 examples/eig/main.c diff --git a/build/tar_files_list.txt b/build/tar_files_list.txt index 4eebae3..4dacec4 100644 --- a/build/tar_files_list.txt +++ b/build/tar_files_list.txt @@ -3,6 +3,7 @@ Makefile debian examples/make.inc examples/Makefile +examples/eig examples/ludinv examples/matmpy blis/version diff --git a/build/version.txt b/build/version.txt index b456a71..482e997 100644 --- a/build/version.txt +++ b/build/version.txt @@ -1 +1 @@ -0.0.2.0 +0.1.0.0 diff --git a/debian/ti-linalg.install b/debian/ti-linalg.install deleted file mode 100644 index 1196c25..0000000 --- a/debian/ti-linalg.install +++ /dev/null @@ -1,4 +0,0 @@ -usr/lib/xyz.a -usr/bin -usr/include -usr/share diff --git a/examples/eig/Makefile b/examples/eig/Makefile new file mode 100644 index 0000000..7570ce6 --- /dev/null +++ b/examples/eig/Makefile @@ -0,0 +1,22 @@ +EXE = eig + +include ../make.inc + +$(EXE): main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o + $(CC) $(CFLAGS) main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o $(LAPACKLIB) -o $@ + +ARMtest: $(EXE) + @echo "Forcing BLAS level 3 execution on ARM."; \ + export TI_CBLAS_OFFLOAD=000; \ + ./$(EXE); + +DSPtest: $(EXE) + @echo "Forcing BLAS level 3 execution on DSP."; \ + export TI_CBLAS_OFFLOAD=001; \ + ./$(EXE); + +OPTtest: $(EXE) + @echo "Optimal BLAS level 3 execution on ARM or DSP."; \ + export TI_CBLAS_OFFLOAD=002; \ + ./$(EXE); + diff --git a/examples/eig/dlaran.c b/examples/eig/dlaran.c new file mode 100644 index 0000000..fd0969d --- /dev/null +++ b/examples/eig/dlaran.c @@ -0,0 +1,123 @@ +/* dlaran.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal dlaran_(integer *iseed) +{ + /* System generated locals */ + doublereal ret_val; + + /* Local variables */ + integer it1, it2, it3, it4; + doublereal rndout; + + +/* -- LAPACK auxiliary routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARAN returns a random real number from a uniform (0,1) */ +/* distribution. */ + +/* Arguments */ +/* ========= */ + +/* ISEED (input/output) INTEGER array, dimension (4) */ +/* On entry, the seed of the random number generator; the array */ +/* elements must be between 0 and 4095, and ISEED(4) must be */ +/* odd. */ +/* On exit, the seed is updated. */ + +/* Further Details */ +/* =============== */ + +/* This routine uses a multiplicative congruential method with modulus */ +/* 2**48 and multiplier 33952834046453 (see G.S.Fishman, */ +/* 'Multiplicative congruential random number generators with modulus */ +/* 2**b: an exhaustive analysis for b = 32 and a partial analysis for */ +/* b = 48', Math. Comp. 189, pp 331-344, 1990). */ + +/* 48-bit integers are stored in 4 integer array elements with 12 bits */ +/* per element. Hence the routine is portable across machines with */ +/* integers of 32 bits or more. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + /* Parameter adjustments */ + --iseed; + + /* Function Body */ +L10: + +/* multiply the seed by the multiplier modulo 2**48 */ + + it4 = iseed[4] * 2549; + it3 = it4 / 4096; + it4 -= it3 << 12; + it3 = it3 + iseed[3] * 2549 + iseed[4] * 2508; + it2 = it3 / 4096; + it3 -= it2 << 12; + it2 = it2 + iseed[2] * 2549 + iseed[3] * 2508 + iseed[4] * 322; + it1 = it2 / 4096; + it2 -= it1 << 12; + it1 = it1 + iseed[1] * 2549 + iseed[2] * 2508 + iseed[3] * 322 + iseed[4] + * 494; + it1 %= 4096; + +/* return updated seed */ + + iseed[1] = it1; + iseed[2] = it2; + iseed[3] = it3; + iseed[4] = it4; + +/* convert 48-bit integer to a real number in the interval (0,1) */ + + rndout = ((doublereal) it1 + ((doublereal) it2 + ((doublereal) it3 + ( + doublereal) it4 * 2.44140625e-4) * 2.44140625e-4) * 2.44140625e-4) + * 2.44140625e-4; + + if (rndout == 1.) { +/* If a real number has n bits of precision, and the first */ +/* n bits of the 48-bit integer above happen to be all 1 (which */ +/* will occur about once every 2**n calls), then DLARAN will */ +/* be rounded to exactly 1.0. */ +/* Since DLARAN is not supposed to return exactly 0.0 or 1.0 */ +/* (and some callers of DLARAN, such as CLARND, depend on that), */ +/* the statistically correct thing to do in this situation is */ +/* simply to iterate again. */ +/* N.B. the case DLARAN = 0.0 should not be possible. */ + + goto L10; + } + + ret_val = rndout; + return ret_val; + +/* End of DLARAN */ + +} /* dlaran_ */ diff --git a/examples/eig/dlarnd.c b/examples/eig/dlarnd.c new file mode 100644 index 0000000..df629ab --- /dev/null +++ b/examples/eig/dlarnd.c @@ -0,0 +1,108 @@ +/* dlarnd.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal dlarnd_(integer *idist, integer *iseed) +{ + /* System generated locals */ + doublereal ret_val; + + /* Builtin functions */ + double log(doublereal), sqrt(doublereal), cos(doublereal); + + /* Local variables */ + doublereal t1, t2; + extern doublereal dlaran_(integer *); + + +/* -- LAPACK auxiliary routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLARND returns a random real number from a uniform or normal */ +/* distribution. */ + +/* Arguments */ +/* ========= */ + +/* IDIST (input) INTEGER */ +/* Specifies the distribution of the random numbers: */ +/* = 1: uniform (0,1) */ +/* = 2: uniform (-1,1) */ +/* = 3: normal (0,1) */ + +/* ISEED (input/output) INTEGER array, dimension (4) */ +/* On entry, the seed of the random number generator; the array */ +/* elements must be between 0 and 4095, and ISEED(4) must be */ +/* odd. */ +/* On exit, the seed is updated. */ + +/* Further Details */ +/* =============== */ + +/* This routine calls the auxiliary routine DLARAN to generate a random */ +/* real number from a uniform (0,1) distribution. The Box-Muller method */ +/* is used to transform numbers from a uniform to a normal distribution. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Generate a real random number from a uniform (0,1) distribution */ + + /* Parameter adjustments */ + --iseed; + + /* Function Body */ + t1 = dlaran_(&iseed[1]); + + if (*idist == 1) { + +/* uniform (0,1) */ + + ret_val = t1; + } else if (*idist == 2) { + +/* uniform (-1,1) */ + + ret_val = t1 * 2. - 1.; + } else if (*idist == 3) { + +/* normal (0,1) */ + + t2 = dlaran_(&iseed[1]); + ret_val = sqrt(log(t1) * -2.) * cos(t2 * + 6.2831853071795864769252867663); + } + return ret_val; + +/* End of DLARND */ + +} /* dlarnd_ */ diff --git a/examples/eig/dlatm1.c b/examples/eig/dlatm1.c new file mode 100644 index 0000000..f36f314 --- /dev/null +++ b/examples/eig/dlatm1.c @@ -0,0 +1,283 @@ +/* dlatm1.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Subroutine */ int dlatm1_(integer *mode, doublereal *cond, integer *irsign, + integer *idist, integer *iseed, doublereal *d__, integer *n, integer + *info) +{ + /* System generated locals */ + integer i__1, i__2; + doublereal d__1; + + /* Builtin functions */ + double pow_dd(doublereal *, doublereal *), pow_di(doublereal *, integer *) + , log(doublereal), exp(doublereal); + + /* Local variables */ + integer i__; + doublereal temp, alpha; + extern doublereal dlaran_(integer *); + extern /* Subroutine */ int xerbla_(char *, integer *), dlarnv_( + integer *, integer *, integer *, doublereal *); + + +/* -- LAPACK auxiliary test routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATM1 computes the entries of D(1..N) as specified by */ +/* MODE, COND and IRSIGN. IDIST and ISEED determine the generation */ +/* of random numbers. DLATM1 is called by SLATMR to generate */ +/* random test matrices for LAPACK programs. */ + +/* Arguments */ +/* ========= */ + +/* MODE - INTEGER */ +/* On entry describes how D is to be computed: */ +/* MODE = 0 means do not change D. */ +/* MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND */ +/* MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND */ +/* MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) */ +/* MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) */ +/* MODE = 5 sets D to random numbers in the range */ +/* ( 1/COND , 1 ) such that their logarithms */ +/* are uniformly distributed. */ +/* MODE = 6 set D to random numbers from same distribution */ +/* as the rest of the matrix. */ +/* MODE < 0 has the same meaning as ABS(MODE), except that */ +/* the order of the elements of D is reversed. */ +/* Thus if MODE is positive, D has entries ranging from */ +/* 1 to 1/COND, if negative, from 1/COND to 1, */ +/* Not modified. */ + +/* COND - DOUBLE PRECISION */ +/* On entry, used as described under MODE above. */ +/* If used, it must be >= 1. Not modified. */ + +/* IRSIGN - INTEGER */ +/* On entry, if MODE neither -6, 0 nor 6, determines sign of */ +/* entries of D */ +/* 0 => leave entries of D unchanged */ +/* 1 => multiply each entry of D by 1 or -1 with probability .5 */ + +/* IDIST - CHARACTER*1 */ +/* On entry, IDIST specifies the type of distribution to be */ +/* used to generate a random matrix . */ +/* 1 => UNIFORM( 0, 1 ) */ +/* 2 => UNIFORM( -1, 1 ) */ +/* 3 => NORMAL( 0, 1 ) */ +/* Not modified. */ + +/* ISEED - INTEGER array, dimension ( 4 ) */ +/* On entry ISEED specifies the seed of the random number */ +/* generator. The random number generator uses a */ +/* linear congruential sequence limited to small */ +/* integers, and so should produce machine independent */ +/* random numbers. The values of ISEED are changed on */ +/* exit, and can be used in the next call to DLATM1 */ +/* to continue the same random number sequence. */ +/* Changed on exit. */ + +/* D - DOUBLE PRECISION array, dimension ( MIN( M , N ) ) */ +/* Array to be computed according to MODE, COND and IRSIGN. */ +/* May be changed on exit if MODE is nonzero. */ + +/* N - INTEGER */ +/* Number of entries of D. Not modified. */ + +/* INFO - INTEGER */ +/* 0 => normal termination */ +/* -1 => if MODE not in range -6 to 6 */ +/* -2 => if MODE neither -6, 0 nor 6, and */ +/* IRSIGN neither 0 nor 1 */ +/* -3 => if MODE neither -6, 0 nor 6 and COND less than 1 */ +/* -4 => if MODE equals 6 or -6 and IDIST not in range 1 to 3 */ +/* -7 => if N negative */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* Decode and Test the input parameters. Initialize flags & seed. */ + + /* Parameter adjustments */ + --d__; + --iseed; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Set INFO if an error */ + + if (*mode < -6 || *mode > 6) { + *info = -1; + } else if (*mode != -6 && *mode != 0 && *mode != 6 && (*irsign != 0 && * + irsign != 1)) { + *info = -2; + } else if (*mode != -6 && *mode != 0 && *mode != 6 && *cond < 1.) { + *info = -3; + } else if ((*mode == 6 || *mode == -6) && (*idist < 1 || *idist > 3)) { + *info = -4; + } else if (*n < 0) { + *info = -7; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLATM1", &i__1); + return 0; + } + +/* Compute D according to COND and MODE */ + + if (*mode != 0) { + switch (abs(*mode)) { + case 1: goto L10; + case 2: goto L30; + case 3: goto L50; + case 4: goto L70; + case 5: goto L90; + case 6: goto L110; + } + +/* One large D value: */ + +L10: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = 1. / *cond; +/* L20: */ + } + d__[1] = 1.; + goto L120; + +/* One small D value: */ + +L30: + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = 1.; +/* L40: */ + } + d__[*n] = 1. / *cond; + goto L120; + +/* Exponentially distributed D values: */ + +L50: + d__[1] = 1.; + if (*n > 1) { + d__1 = -1. / (doublereal) (*n - 1); + alpha = pow_dd(cond, &d__1); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + i__2 = i__ - 1; + d__[i__] = pow_di(&alpha, &i__2); +/* L60: */ + } + } + goto L120; + +/* Arithmetically distributed D values: */ + +L70: + d__[1] = 1.; + if (*n > 1) { + temp = 1. / *cond; + alpha = (1. - temp) / (doublereal) (*n - 1); + i__1 = *n; + for (i__ = 2; i__ <= i__1; ++i__) { + d__[i__] = (doublereal) (*n - i__) * alpha + temp; +/* L80: */ + } + } + goto L120; + +/* Randomly distributed D values on ( 1/COND , 1): */ + +L90: + alpha = log(1. / *cond); + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = exp(alpha * dlaran_(&iseed[1])); +/* L100: */ + } + goto L120; + +/* Randomly distributed D values from IDIST */ + +L110: + dlarnv_(idist, &iseed[1], n, &d__[1]); + +L120: + +/* If MODE neither -6 nor 0 nor 6, and IRSIGN = 1, assign */ +/* random signs to D */ + + if (*mode != -6 && *mode != 0 && *mode != 6 && *irsign == 1) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = dlaran_(&iseed[1]); + if (temp > .5) { + d__[i__] = -d__[i__]; + } +/* L130: */ + } + } + +/* Reverse if MODE < 0 */ + + if (*mode < 0) { + i__1 = *n / 2; + for (i__ = 1; i__ <= i__1; ++i__) { + temp = d__[i__]; + d__[i__] = d__[*n + 1 - i__]; + d__[*n + 1 - i__] = temp; +/* L140: */ + } + } + + } + + return 0; + +/* End of DLATM1 */ + +} /* dlatm1_ */ diff --git a/examples/eig/dlatm2.c b/examples/eig/dlatm2.c new file mode 100644 index 0000000..502a529 --- /dev/null +++ b/examples/eig/dlatm2.c @@ -0,0 +1,251 @@ +/* dlatm2.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal dlatm2_(integer *m, integer *n, integer *i__, integer *j, integer * + kl, integer *ku, integer *idist, integer *iseed, doublereal *d__, + integer *igrade, doublereal *dl, doublereal *dr, integer *ipvtng, + integer *iwork, doublereal *sparse) +{ + /* System generated locals */ + doublereal ret_val; + + /* Local variables */ + integer isub, jsub; + doublereal temp; + extern doublereal dlaran_(integer *), dlarnd_(integer *, integer *); + + +/* -- LAPACK auxiliary test routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ + +/* .. */ + +/* .. Array Arguments .. */ + +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATM2 returns the (I,J) entry of a random matrix of dimension */ +/* (M, N) described by the other paramters. It is called by the */ +/* DLATMR routine in order to build random test matrices. No error */ +/* checking on parameters is done, because this routine is called in */ +/* a tight loop by DLATMR which has already checked the parameters. */ + +/* Use of DLATM2 differs from SLATM3 in the order in which the random */ +/* number generator is called to fill in random matrix entries. */ +/* With DLATM2, the generator is called to fill in the pivoted matrix */ +/* columnwise. With DLATM3, the generator is called to fill in the */ +/* matrix columnwise, after which it is pivoted. Thus, DLATM3 can */ +/* be used to construct random matrices which differ only in their */ +/* order of rows and/or columns. DLATM2 is used to construct band */ +/* matrices while avoiding calling the random number generator for */ +/* entries outside the band (and therefore generating random numbers */ + +/* The matrix whose (I,J) entry is returned is constructed as */ +/* follows (this routine only computes one entry): */ + +/* If I is outside (1..M) or J is outside (1..N), return zero */ +/* (this is convenient for generating matrices in band format). */ + +/* Generate a matrix A with random entries of distribution IDIST. */ + +/* Set the diagonal to D. */ + +/* Grade the matrix, if desired, from the left (by DL) and/or */ +/* from the right (by DR or DL) as specified by IGRADE. */ + +/* Permute, if desired, the rows and/or columns as specified by */ +/* IPVTNG and IWORK. */ + +/* Band the matrix to have lower bandwidth KL and upper */ +/* bandwidth KU. */ + +/* Set random entries to zero as specified by SPARSE. */ + +/* Arguments */ +/* ========= */ + +/* M - INTEGER */ +/* Number of rows of matrix. Not modified. */ + +/* N - INTEGER */ +/* Number of columns of matrix. Not modified. */ + +/* I - INTEGER */ +/* Row of entry to be returned. Not modified. */ + +/* J - INTEGER */ +/* Column of entry to be returned. Not modified. */ + +/* KL - INTEGER */ +/* Lower bandwidth. Not modified. */ + +/* KU - INTEGER */ +/* Upper bandwidth. Not modified. */ + +/* IDIST - INTEGER */ +/* On entry, IDIST specifies the type of distribution to be */ +/* used to generate a random matrix . */ +/* 1 => UNIFORM( 0, 1 ) */ +/* 2 => UNIFORM( -1, 1 ) */ +/* 3 => NORMAL( 0, 1 ) */ +/* Not modified. */ + +/* ISEED - INTEGER array of dimension ( 4 ) */ +/* Seed for random number generator. */ +/* Changed on exit. */ + +/* D - DOUBLE PRECISION array of dimension ( MIN( I , J ) ) */ +/* Diagonal entries of matrix. Not modified. */ + +/* IGRADE - INTEGER */ +/* Specifies grading of matrix as follows: */ +/* 0 => no grading */ +/* 1 => matrix premultiplied by diag( DL ) */ +/* 2 => matrix postmultiplied by diag( DR ) */ +/* 3 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DR ) */ +/* 4 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by inv( diag( DL ) ) */ +/* 5 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DL ) */ +/* Not modified. */ + +/* DL - DOUBLE PRECISION array ( I or J, as appropriate ) */ +/* Left scale factors for grading matrix. Not modified. */ + +/* DR - DOUBLE PRECISION array ( I or J, as appropriate ) */ +/* Right scale factors for grading matrix. Not modified. */ + +/* IPVTNG - INTEGER */ +/* On entry specifies pivoting permutations as follows: */ +/* 0 => none. */ +/* 1 => row pivoting. */ +/* 2 => column pivoting. */ +/* 3 => full pivoting, i.e., on both sides. */ +/* Not modified. */ + +/* IWORK - INTEGER array ( I or J, as appropriate ) */ +/* This array specifies the permutation used. The */ +/* row (or column) in position K was originally in */ +/* position IWORK( K ). */ +/* This differs from IWORK for DLATM3. Not modified. */ + +/* SPARSE - DOUBLE PRECISION between 0. and 1. */ +/* On entry specifies the sparsity of the matrix */ +/* if sparse matix is to be generated. */ +/* SPARSE should lie between 0 and 1. */ +/* A uniform ( 0, 1 ) random number x is generated and */ +/* compared to SPARSE; if x is larger the matrix entry */ +/* is unchanged and if x is smaller the entry is set */ +/* to zero. Thus on the average a fraction SPARSE of the */ +/* entries will be set to zero. */ +/* Not modified. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. */ + +/* .. Local Scalars .. */ + +/* .. */ + +/* .. External Functions .. */ + +/* .. */ + +/* ----------------------------------------------------------------------- */ + +/* .. Executable Statements .. */ + + +/* Check for I and J in range */ + + /* Parameter adjustments */ + --iwork; + --dr; + --dl; + --d__; + --iseed; + + /* Function Body */ + if (*i__ < 1 || *i__ > *m || *j < 1 || *j > *n) { + ret_val = 0.; + return ret_val; + } + +/* Check for banding */ + + if (*j > *i__ + *ku || *j < *i__ - *kl) { + ret_val = 0.; + return ret_val; + } + +/* Check for sparsity */ + + if (*sparse > 0.) { + if (dlaran_(&iseed[1]) < *sparse) { + ret_val = 0.; + return ret_val; + } + } + +/* Compute subscripts depending on IPVTNG */ + + if (*ipvtng == 0) { + isub = *i__; + jsub = *j; + } else if (*ipvtng == 1) { + isub = iwork[*i__]; + jsub = *j; + } else if (*ipvtng == 2) { + isub = *i__; + jsub = iwork[*j]; + } else if (*ipvtng == 3) { + isub = iwork[*i__]; + jsub = iwork[*j]; + } + +/* Compute entry and grade it according to IGRADE */ + + if (isub == jsub) { + temp = d__[isub]; + } else { + temp = dlarnd_(idist, &iseed[1]); + } + if (*igrade == 1) { + temp *= dl[isub]; + } else if (*igrade == 2) { + temp *= dr[jsub]; + } else if (*igrade == 3) { + temp = temp * dl[isub] * dr[jsub]; + } else if (*igrade == 4 && isub != jsub) { + temp = temp * dl[isub] / dl[jsub]; + } else if (*igrade == 5) { + temp = temp * dl[isub] * dl[jsub]; + } + ret_val = temp; + return ret_val; + +/* End of DLATM2 */ + +} /* dlatm2_ */ diff --git a/examples/eig/dlatm3.c b/examples/eig/dlatm3.c new file mode 100644 index 0000000..2ddef32 --- /dev/null +++ b/examples/eig/dlatm3.c @@ -0,0 +1,261 @@ +/* dlatm3.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +doublereal dlatm3_(integer *m, integer *n, integer *i__, integer *j, integer * + isub, integer *jsub, integer *kl, integer *ku, integer *idist, + integer *iseed, doublereal *d__, integer *igrade, doublereal *dl, + doublereal *dr, integer *ipvtng, integer *iwork, doublereal *sparse) +{ + /* System generated locals */ + doublereal ret_val; + + /* Local variables */ + doublereal temp; + extern doublereal dlaran_(integer *), dlarnd_(integer *, integer *); + + +/* -- LAPACK auxiliary test routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ + +/* .. */ + +/* .. Array Arguments .. */ + +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATM3 returns the (ISUB,JSUB) entry of a random matrix of */ +/* dimension (M, N) described by the other paramters. (ISUB,JSUB) */ +/* is the final position of the (I,J) entry after pivoting */ +/* according to IPVTNG and IWORK. DLATM3 is called by the */ +/* DLATMR routine in order to build random test matrices. No error */ +/* checking on parameters is done, because this routine is called in */ +/* a tight loop by DLATMR which has already checked the parameters. */ + +/* Use of DLATM3 differs from SLATM2 in the order in which the random */ +/* number generator is called to fill in random matrix entries. */ +/* With DLATM2, the generator is called to fill in the pivoted matrix */ +/* columnwise. With DLATM3, the generator is called to fill in the */ +/* matrix columnwise, after which it is pivoted. Thus, DLATM3 can */ +/* be used to construct random matrices which differ only in their */ +/* order of rows and/or columns. DLATM2 is used to construct band */ +/* matrices while avoiding calling the random number generator for */ +/* entries outside the band (and therefore generating random numbers */ +/* in different orders for different pivot orders). */ + +/* The matrix whose (ISUB,JSUB) entry is returned is constructed as */ +/* follows (this routine only computes one entry): */ + +/* If ISUB is outside (1..M) or JSUB is outside (1..N), return zero */ +/* (this is convenient for generating matrices in band format). */ + +/* Generate a matrix A with random entries of distribution IDIST. */ + +/* Set the diagonal to D. */ + +/* Grade the matrix, if desired, from the left (by DL) and/or */ +/* from the right (by DR or DL) as specified by IGRADE. */ + +/* Permute, if desired, the rows and/or columns as specified by */ +/* IPVTNG and IWORK. */ + +/* Band the matrix to have lower bandwidth KL and upper */ +/* bandwidth KU. */ + +/* Set random entries to zero as specified by SPARSE. */ + +/* Arguments */ +/* ========= */ + +/* M - INTEGER */ +/* Number of rows of matrix. Not modified. */ + +/* N - INTEGER */ +/* Number of columns of matrix. Not modified. */ + +/* I - INTEGER */ +/* Row of unpivoted entry to be returned. Not modified. */ + +/* J - INTEGER */ +/* Column of unpivoted entry to be returned. Not modified. */ + +/* ISUB - INTEGER */ +/* Row of pivoted entry to be returned. Changed on exit. */ + +/* JSUB - INTEGER */ +/* Column of pivoted entry to be returned. Changed on exit. */ + +/* KL - INTEGER */ +/* Lower bandwidth. Not modified. */ + +/* KU - INTEGER */ +/* Upper bandwidth. Not modified. */ + +/* IDIST - INTEGER */ +/* On entry, IDIST specifies the type of distribution to be */ +/* used to generate a random matrix . */ +/* 1 => UNIFORM( 0, 1 ) */ +/* 2 => UNIFORM( -1, 1 ) */ +/* 3 => NORMAL( 0, 1 ) */ +/* Not modified. */ + +/* ISEED - INTEGER array of dimension ( 4 ) */ +/* Seed for random number generator. */ +/* Changed on exit. */ + +/* D - DOUBLE PRECISION array of dimension ( MIN( I , J ) ) */ +/* Diagonal entries of matrix. Not modified. */ + +/* IGRADE - INTEGER */ +/* Specifies grading of matrix as follows: */ +/* 0 => no grading */ +/* 1 => matrix premultiplied by diag( DL ) */ +/* 2 => matrix postmultiplied by diag( DR ) */ +/* 3 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DR ) */ +/* 4 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by inv( diag( DL ) ) */ +/* 5 => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DL ) */ +/* Not modified. */ + +/* DL - DOUBLE PRECISION array ( I or J, as appropriate ) */ +/* Left scale factors for grading matrix. Not modified. */ + +/* DR - DOUBLE PRECISION array ( I or J, as appropriate ) */ +/* Right scale factors for grading matrix. Not modified. */ + +/* IPVTNG - INTEGER */ +/* On entry specifies pivoting permutations as follows: */ +/* 0 => none. */ +/* 1 => row pivoting. */ +/* 2 => column pivoting. */ +/* 3 => full pivoting, i.e., on both sides. */ +/* Not modified. */ + +/* IWORK - INTEGER array ( I or J, as appropriate ) */ +/* This array specifies the permutation used. The */ +/* row (or column) originally in position K is in */ +/* position IWORK( K ) after pivoting. */ +/* This differs from IWORK for DLATM2. Not modified. */ + +/* SPARSE - DOUBLE PRECISION between 0. and 1. */ +/* On entry specifies the sparsity of the matrix */ +/* if sparse matix is to be generated. */ +/* SPARSE should lie between 0 and 1. */ +/* A uniform ( 0, 1 ) random number x is generated and */ +/* compared to SPARSE; if x is larger the matrix entry */ +/* is unchanged and if x is smaller the entry is set */ +/* to zero. Thus on the average a fraction SPARSE of the */ +/* entries will be set to zero. */ +/* Not modified. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ + +/* .. */ + +/* .. Local Scalars .. */ + +/* .. */ + +/* .. External Functions .. */ + +/* .. */ + +/* ----------------------------------------------------------------------- */ + +/* .. Executable Statements .. */ + + +/* Check for I and J in range */ + + /* Parameter adjustments */ + --iwork; + --dr; + --dl; + --d__; + --iseed; + + /* Function Body */ + if (*i__ < 1 || *i__ > *m || *j < 1 || *j > *n) { + *isub = *i__; + *jsub = *j; + ret_val = 0.; + return ret_val; + } + +/* Compute subscripts depending on IPVTNG */ + + if (*ipvtng == 0) { + *isub = *i__; + *jsub = *j; + } else if (*ipvtng == 1) { + *isub = iwork[*i__]; + *jsub = *j; + } else if (*ipvtng == 2) { + *isub = *i__; + *jsub = iwork[*j]; + } else if (*ipvtng == 3) { + *isub = iwork[*i__]; + *jsub = iwork[*j]; + } + +/* Check for banding */ + + if (*jsub > *isub + *ku || *jsub < *isub - *kl) { + ret_val = 0.; + return ret_val; + } + +/* Check for sparsity */ + + if (*sparse > 0.) { + if (dlaran_(&iseed[1]) < *sparse) { + ret_val = 0.; + return ret_val; + } + } + +/* Compute entry and grade it according to IGRADE */ + + if (*i__ == *j) { + temp = d__[*i__]; + } else { + temp = dlarnd_(idist, &iseed[1]); + } + if (*igrade == 1) { + temp *= dl[*i__]; + } else if (*igrade == 2) { + temp *= dr[*j]; + } else if (*igrade == 3) { + temp = temp * dl[*i__] * dr[*j]; + } else if (*igrade == 4 && *i__ != *j) { + temp = temp * dl[*i__] / dl[*j]; + } else if (*igrade == 5) { + temp = temp * dl[*i__] * dl[*j]; + } + ret_val = temp; + return ret_val; + +/* End of DLATM3 */ + +} /* dlatm3_ */ diff --git a/examples/eig/dlatmr.c b/examples/eig/dlatmr.c new file mode 100644 index 0000000..0b74688 --- /dev/null +++ b/examples/eig/dlatmr.c @@ -0,0 +1,1288 @@ +/* dlatmr.f -- translated by f2c (version 20061008). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "f2c.h" +#include "blaswrap.h" + +/* Table of constant values */ + +static integer c__0 = 0; +static integer c__1 = 1; + +/* Subroutine */ int dlatmr_(integer *m, integer *n, char *dist, integer * + iseed, char *sym, doublereal *d__, integer *mode, doublereal *cond, + doublereal *dmax__, char *rsign, char *grade, doublereal *dl, integer + *model, doublereal *condl, doublereal *dr, integer *moder, doublereal + *condr, char *pivtng, integer *ipivot, integer *kl, integer *ku, + doublereal *sparse, doublereal *anorm, char *pack, doublereal *a, + integer *lda, integer *iwork, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + doublereal d__1, d__2, d__3; + + /* Local variables */ + integer i__, j, k, kll, kuu, isub, jsub; + doublereal temp; + integer isym; + doublereal alpha; + extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, + integer *); + integer ipack; + extern logical lsame_(char *, char *); + doublereal tempa[1]; + integer iisub, idist, jjsub, mnmin; + logical dzero; + integer mnsub; + doublereal onorm; + integer mxsub, npvts; + extern /* Subroutine */ int dlatm1_(integer *, doublereal *, integer *, + integer *, integer *, doublereal *, integer *, integer *); + extern doublereal dlatm2_(integer *, integer *, integer *, integer *, + integer *, integer *, integer *, integer *, doublereal *, integer + *, doublereal *, doublereal *, integer *, integer *, doublereal *) + , dlatm3_(integer *, integer *, integer *, integer *, integer *, + integer *, integer *, integer *, integer *, integer *, doublereal + *, integer *, doublereal *, doublereal *, integer *, integer *, + doublereal *), dlangb_(char *, integer *, integer *, integer *, + doublereal *, integer *, doublereal *), dlange_(char *, + integer *, integer *, doublereal *, integer *, doublereal *); + integer igrade; + extern doublereal dlansb_(char *, char *, integer *, integer *, + doublereal *, integer *, doublereal *); + logical fulbnd; + extern /* Subroutine */ int xerbla_(char *, integer *); + logical badpvt; + extern doublereal dlansp_(char *, char *, integer *, doublereal *, + doublereal *), dlansy_(char *, char *, integer *, + doublereal *, integer *, doublereal *); + integer irsign, ipvtng; + + +/* -- LAPACK test routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DLATMR generates random matrices of various types for testing */ +/* LAPACK programs. */ + +/* DLATMR operates by applying the following sequence of */ +/* operations: */ + +/* Generate a matrix A with random entries of distribution DIST */ +/* which is symmetric if SYM='S', and nonsymmetric */ +/* if SYM='N'. */ + +/* Set the diagonal to D, where D may be input or */ +/* computed according to MODE, COND, DMAX and RSIGN */ +/* as described below. */ + +/* Grade the matrix, if desired, from the left and/or right */ +/* as specified by GRADE. The inputs DL, MODEL, CONDL, DR, */ +/* MODER and CONDR also determine the grading as described */ +/* below. */ + +/* Permute, if desired, the rows and/or columns as specified by */ +/* PIVTNG and IPIVOT. */ + +/* Set random entries to zero, if desired, to get a random sparse */ +/* matrix as specified by SPARSE. */ + +/* Make A a band matrix, if desired, by zeroing out the matrix */ +/* outside a band of lower bandwidth KL and upper bandwidth KU. */ + +/* Scale A, if desired, to have maximum entry ANORM. */ + +/* Pack the matrix if desired. Options specified by PACK are: */ +/* no packing */ +/* zero out upper half (if symmetric) */ +/* zero out lower half (if symmetric) */ +/* store the upper half columnwise (if symmetric or */ +/* square upper triangular) */ +/* store the lower half columnwise (if symmetric or */ +/* square lower triangular) */ +/* same as upper half rowwise if symmetric */ +/* store the lower triangle in banded format (if symmetric) */ +/* store the upper triangle in banded format (if symmetric) */ +/* store the entire matrix in banded format */ + +/* Note: If two calls to DLATMR differ only in the PACK parameter, */ +/* they will generate mathematically equivalent matrices. */ + +/* If two calls to DLATMR both have full bandwidth (KL = M-1 */ +/* and KU = N-1), and differ only in the PIVTNG and PACK */ +/* parameters, then the matrices generated will differ only */ +/* in the order of the rows and/or columns, and otherwise */ +/* contain the same data. This consistency cannot be and */ +/* is not maintained with less than full bandwidth. */ + +/* Arguments */ +/* ========= */ + +/* M - INTEGER */ +/* Number of rows of A. Not modified. */ + +/* N - INTEGER */ +/* Number of columns of A. Not modified. */ + +/* DIST - CHARACTER*1 */ +/* On entry, DIST specifies the type of distribution to be used */ +/* to generate a random matrix . */ +/* 'U' => UNIFORM( 0, 1 ) ( 'U' for uniform ) */ +/* 'S' => UNIFORM( -1, 1 ) ( 'S' for symmetric ) */ +/* 'N' => NORMAL( 0, 1 ) ( 'N' for normal ) */ +/* Not modified. */ + +/* ISEED - INTEGER array, dimension (4) */ +/* On entry ISEED specifies the seed of the random number */ +/* generator. They should lie between 0 and 4095 inclusive, */ +/* and ISEED(4) should be odd. The random number generator */ +/* uses a linear congruential sequence limited to small */ +/* integers, and so should produce machine independent */ +/* random numbers. The values of ISEED are changed on */ +/* exit, and can be used in the next call to DLATMR */ +/* to continue the same random number sequence. */ +/* Changed on exit. */ + +/* SYM - CHARACTER*1 */ +/* If SYM='S' or 'H', generated matrix is symmetric. */ +/* If SYM='N', generated matrix is nonsymmetric. */ +/* Not modified. */ + +/* D - DOUBLE PRECISION array, dimension (min(M,N)) */ +/* On entry this array specifies the diagonal entries */ +/* of the diagonal of A. D may either be specified */ +/* on entry, or set according to MODE and COND as described */ +/* below. May be changed on exit if MODE is nonzero. */ + +/* MODE - INTEGER */ +/* On entry describes how D is to be used: */ +/* MODE = 0 means use D as input */ +/* MODE = 1 sets D(1)=1 and D(2:N)=1.0/COND */ +/* MODE = 2 sets D(1:N-1)=1 and D(N)=1.0/COND */ +/* MODE = 3 sets D(I)=COND**(-(I-1)/(N-1)) */ +/* MODE = 4 sets D(i)=1 - (i-1)/(N-1)*(1 - 1/COND) */ +/* MODE = 5 sets D to random numbers in the range */ +/* ( 1/COND , 1 ) such that their logarithms */ +/* are uniformly distributed. */ +/* MODE = 6 set D to random numbers from same distribution */ +/* as the rest of the matrix. */ +/* MODE < 0 has the same meaning as ABS(MODE), except that */ +/* the order of the elements of D is reversed. */ +/* Thus if MODE is positive, D has entries ranging from */ +/* 1 to 1/COND, if negative, from 1/COND to 1, */ +/* Not modified. */ + +/* COND - DOUBLE PRECISION */ +/* On entry, used as described under MODE above. */ +/* If used, it must be >= 1. Not modified. */ + +/* DMAX - DOUBLE PRECISION */ +/* If MODE neither -6, 0 nor 6, the diagonal is scaled by */ +/* DMAX / max(abs(D(i))), so that maximum absolute entry */ +/* of diagonal is abs(DMAX). If DMAX is negative (or zero), */ +/* diagonal will be scaled by a negative number (or zero). */ + +/* RSIGN - CHARACTER*1 */ +/* If MODE neither -6, 0 nor 6, specifies sign of diagonal */ +/* as follows: */ +/* 'T' => diagonal entries are multiplied by 1 or -1 */ +/* with probability .5 */ +/* 'F' => diagonal unchanged */ +/* Not modified. */ + +/* GRADE - CHARACTER*1 */ +/* Specifies grading of matrix as follows: */ +/* 'N' => no grading */ +/* 'L' => matrix premultiplied by diag( DL ) */ +/* (only if matrix nonsymmetric) */ +/* 'R' => matrix postmultiplied by diag( DR ) */ +/* (only if matrix nonsymmetric) */ +/* 'B' => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DR ) */ +/* (only if matrix nonsymmetric) */ +/* 'S' or 'H' => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by diag( DL ) */ +/* ('S' for symmetric, or 'H' for Hermitian) */ +/* 'E' => matrix premultiplied by diag( DL ) and */ +/* postmultiplied by inv( diag( DL ) ) */ +/* ( 'E' for eigenvalue invariance) */ +/* (only if matrix nonsymmetric) */ +/* Note: if GRADE='E', then M must equal N. */ +/* Not modified. */ + +/* DL - DOUBLE PRECISION array, dimension (M) */ +/* If MODEL=0, then on entry this array specifies the diagonal */ +/* entries of a diagonal matrix used as described under GRADE */ +/* above. If MODEL is not zero, then DL will be set according */ +/* to MODEL and CONDL, analogous to the way D is set according */ +/* to MODE and COND (except there is no DMAX parameter for DL). */ +/* If GRADE='E', then DL cannot have zero entries. */ +/* Not referenced if GRADE = 'N' or 'R'. Changed on exit. */ + +/* MODEL - INTEGER */ +/* This specifies how the diagonal array DL is to be computed, */ +/* just as MODE specifies how D is to be computed. */ +/* Not modified. */ + +/* CONDL - DOUBLE PRECISION */ +/* When MODEL is not zero, this specifies the condition number */ +/* of the computed DL. Not modified. */ + +/* DR - DOUBLE PRECISION array, dimension (N) */ +/* If MODER=0, then on entry this array specifies the diagonal */ +/* entries of a diagonal matrix used as described under GRADE */ +/* above. If MODER is not zero, then DR will be set according */ +/* to MODER and CONDR, analogous to the way D is set according */ +/* to MODE and COND (except there is no DMAX parameter for DR). */ +/* Not referenced if GRADE = 'N', 'L', 'H', 'S' or 'E'. */ +/* Changed on exit. */ + +/* MODER - INTEGER */ +/* This specifies how the diagonal array DR is to be computed, */ +/* just as MODE specifies how D is to be computed. */ +/* Not modified. */ + +/* CONDR - DOUBLE PRECISION */ +/* When MODER is not zero, this specifies the condition number */ +/* of the computed DR. Not modified. */ + +/* PIVTNG - CHARACTER*1 */ +/* On entry specifies pivoting permutations as follows: */ +/* 'N' or ' ' => none. */ +/* 'L' => left or row pivoting (matrix must be nonsymmetric). */ +/* 'R' => right or column pivoting (matrix must be */ +/* nonsymmetric). */ +/* 'B' or 'F' => both or full pivoting, i.e., on both sides. */ +/* In this case, M must equal N */ + +/* If two calls to DLATMR both have full bandwidth (KL = M-1 */ +/* and KU = N-1), and differ only in the PIVTNG and PACK */ +/* parameters, then the matrices generated will differ only */ +/* in the order of the rows and/or columns, and otherwise */ +/* contain the same data. This consistency cannot be */ +/* maintained with less than full bandwidth. */ + +/* IPIVOT - INTEGER array, dimension (N or M) */ +/* This array specifies the permutation used. After the */ +/* basic matrix is generated, the rows, columns, or both */ +/* are permuted. If, say, row pivoting is selected, DLATMR */ +/* starts with the *last* row and interchanges the M-th and */ +/* IPIVOT(M)-th rows, then moves to the next-to-last row, */ +/* interchanging the (M-1)-th and the IPIVOT(M-1)-th rows, */ +/* and so on. In terms of "2-cycles", the permutation is */ +/* (1 IPIVOT(1)) (2 IPIVOT(2)) ... (M IPIVOT(M)) */ +/* where the rightmost cycle is applied first. This is the */ +/* *inverse* of the effect of pivoting in LINPACK. The idea */ +/* is that factoring (with pivoting) an identity matrix */ +/* which has been inverse-pivoted in this way should */ +/* result in a pivot vector identical to IPIVOT. */ +/* Not referenced if PIVTNG = 'N'. Not modified. */ + +/* SPARSE - DOUBLE PRECISION */ +/* On entry specifies the sparsity of the matrix if a sparse */ +/* matrix is to be generated. SPARSE should lie between */ +/* 0 and 1. To generate a sparse matrix, for each matrix entry */ +/* a uniform ( 0, 1 ) random number x is generated and */ +/* compared to SPARSE; if x is larger the matrix entry */ +/* is unchanged and if x is smaller the entry is set */ +/* to zero. Thus on the average a fraction SPARSE of the */ +/* entries will be set to zero. */ +/* Not modified. */ + +/* KL - INTEGER */ +/* On entry specifies the lower bandwidth of the matrix. For */ +/* example, KL=0 implies upper triangular, KL=1 implies upper */ +/* Hessenberg, and KL at least M-1 implies the matrix is not */ +/* banded. Must equal KU if matrix is symmetric. */ +/* Not modified. */ + +/* KU - INTEGER */ +/* On entry specifies the upper bandwidth of the matrix. For */ +/* example, KU=0 implies lower triangular, KU=1 implies lower */ +/* Hessenberg, and KU at least N-1 implies the matrix is not */ +/* banded. Must equal KL if matrix is symmetric. */ +/* Not modified. */ + +/* ANORM - DOUBLE PRECISION */ +/* On entry specifies maximum entry of output matrix */ +/* (output matrix will by multiplied by a constant so that */ +/* its largest absolute entry equal ANORM) */ +/* if ANORM is nonnegative. If ANORM is negative no scaling */ +/* is done. Not modified. */ + +/* PACK - CHARACTER*1 */ +/* On entry specifies packing of matrix as follows: */ +/* 'N' => no packing */ +/* 'U' => zero out all subdiagonal entries (if symmetric) */ +/* 'L' => zero out all superdiagonal entries (if symmetric) */ +/* 'C' => store the upper triangle columnwise */ +/* (only if matrix symmetric or square upper triangular) */ +/* 'R' => store the lower triangle columnwise */ +/* (only if matrix symmetric or square lower triangular) */ +/* (same as upper half rowwise if symmetric) */ +/* 'B' => store the lower triangle in band storage scheme */ +/* (only if matrix symmetric) */ +/* 'Q' => store the upper triangle in band storage scheme */ +/* (only if matrix symmetric) */ +/* 'Z' => store the entire matrix in band storage scheme */ +/* (pivoting can be provided for by using this */ +/* option to store A in the trailing rows of */ +/* the allocated storage) */ + +/* Using these options, the various LAPACK packed and banded */ +/* storage schemes can be obtained: */ +/* GB - use 'Z' */ +/* PB, SB or TB - use 'B' or 'Q' */ +/* PP, SP or TP - use 'C' or 'R' */ + +/* If two calls to DLATMR differ only in the PACK parameter, */ +/* they will generate mathematically equivalent matrices. */ +/* Not modified. */ + +/* A - DOUBLE PRECISION array, dimension (LDA,N) */ +/* On exit A is the desired test matrix. Only those */ +/* entries of A which are significant on output */ +/* will be referenced (even if A is in packed or band */ +/* storage format). The 'unoccupied corners' of A in */ +/* band format will be zeroed out. */ + +/* LDA - INTEGER */ +/* on entry LDA specifies the first dimension of A as */ +/* declared in the calling program. */ +/* If PACK='N', 'U' or 'L', LDA must be at least max ( 1, M ). */ +/* If PACK='C' or 'R', LDA must be at least 1. */ +/* If PACK='B', or 'Q', LDA must be MIN ( KU+1, N ) */ +/* If PACK='Z', LDA must be at least KUU+KLL+1, where */ +/* KUU = MIN ( KU, N-1 ) and KLL = MIN ( KL, N-1 ) */ +/* Not modified. */ + +/* IWORK - INTEGER array, dimension ( N or M) */ +/* Workspace. Not referenced if PIVTNG = 'N'. Changed on exit. */ + +/* INFO - INTEGER */ +/* Error parameter on exit: */ +/* 0 => normal return */ +/* -1 => M negative or unequal to N and SYM='S' or 'H' */ +/* -2 => N negative */ +/* -3 => DIST illegal string */ +/* -5 => SYM illegal string */ +/* -7 => MODE not in range -6 to 6 */ +/* -8 => COND less than 1.0, and MODE neither -6, 0 nor 6 */ +/* -10 => MODE neither -6, 0 nor 6 and RSIGN illegal string */ +/* -11 => GRADE illegal string, or GRADE='E' and */ +/* M not equal to N, or GRADE='L', 'R', 'B' or 'E' and */ +/* SYM = 'S' or 'H' */ +/* -12 => GRADE = 'E' and DL contains zero */ +/* -13 => MODEL not in range -6 to 6 and GRADE= 'L', 'B', 'H', */ +/* 'S' or 'E' */ +/* -14 => CONDL less than 1.0, GRADE='L', 'B', 'H', 'S' or 'E', */ +/* and MODEL neither -6, 0 nor 6 */ +/* -16 => MODER not in range -6 to 6 and GRADE= 'R' or 'B' */ +/* -17 => CONDR less than 1.0, GRADE='R' or 'B', and */ +/* MODER neither -6, 0 nor 6 */ +/* -18 => PIVTNG illegal string, or PIVTNG='B' or 'F' and */ +/* M not equal to N, or PIVTNG='L' or 'R' and SYM='S' */ +/* or 'H' */ +/* -19 => IPIVOT contains out of range number and */ +/* PIVTNG not equal to 'N' */ +/* -20 => KL negative */ +/* -21 => KU negative, or SYM='S' or 'H' and KU not equal to KL */ +/* -22 => SPARSE not in range 0. to 1. */ +/* -24 => PACK illegal string, or PACK='U', 'L', 'B' or 'Q' */ +/* and SYM='N', or PACK='C' and SYM='N' and either KL */ +/* not equal to 0 or N not equal to M, or PACK='R' and */ +/* SYM='N', and either KU not equal to 0 or N not equal */ +/* to M */ +/* -26 => LDA too small */ +/* 1 => Error return from DLATM1 (computing D) */ +/* 2 => Cannot scale diagonal to DMAX (max. entry is 0) */ +/* 3 => Error return from DLATM1 (computing DL) */ +/* 4 => Error return from DLATM1 (computing DR) */ +/* 5 => ANORM is positive, but matrix constructed prior to */ +/* attempting to scale it to have norm ANORM, is zero */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. Local Arrays .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* 1) Decode and Test the input parameters. */ +/* Initialize flags & seed. */ + + /* Parameter adjustments */ + --iseed; + --d__; + --dl; + --dr; + --ipivot; + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --iwork; + + /* Function Body */ + *info = 0; + +/* Quick return if possible */ + + if (*m == 0 || *n == 0) { + return 0; + } + +/* Decode DIST */ + + if (lsame_(dist, "U")) { + idist = 1; + } else if (lsame_(dist, "S")) { + idist = 2; + } else if (lsame_(dist, "N")) { + idist = 3; + } else { + idist = -1; + } + +/* Decode SYM */ + + if (lsame_(sym, "S")) { + isym = 0; + } else if (lsame_(sym, "N")) { + isym = 1; + } else if (lsame_(sym, "H")) { + isym = 0; + } else { + isym = -1; + } + +/* Decode RSIGN */ + + if (lsame_(rsign, "F")) { + irsign = 0; + } else if (lsame_(rsign, "T")) { + irsign = 1; + } else { + irsign = -1; + } + +/* Decode PIVTNG */ + + if (lsame_(pivtng, "N")) { + ipvtng = 0; + } else if (lsame_(pivtng, " ")) { + ipvtng = 0; + } else if (lsame_(pivtng, "L")) { + ipvtng = 1; + npvts = *m; + } else if (lsame_(pivtng, "R")) { + ipvtng = 2; + npvts = *n; + } else if (lsame_(pivtng, "B")) { + ipvtng = 3; + npvts = min(*n,*m); + } else if (lsame_(pivtng, "F")) { + ipvtng = 3; + npvts = min(*n,*m); + } else { + ipvtng = -1; + } + +/* Decode GRADE */ + + if (lsame_(grade, "N")) { + igrade = 0; + } else if (lsame_(grade, "L")) { + igrade = 1; + } else if (lsame_(grade, "R")) { + igrade = 2; + } else if (lsame_(grade, "B")) { + igrade = 3; + } else if (lsame_(grade, "E")) { + igrade = 4; + } else if (lsame_(grade, "H") || lsame_(grade, + "S")) { + igrade = 5; + } else { + igrade = -1; + } + +/* Decode PACK */ + + if (lsame_(pack, "N")) { + ipack = 0; + } else if (lsame_(pack, "U")) { + ipack = 1; + } else if (lsame_(pack, "L")) { + ipack = 2; + } else if (lsame_(pack, "C")) { + ipack = 3; + } else if (lsame_(pack, "R")) { + ipack = 4; + } else if (lsame_(pack, "B")) { + ipack = 5; + } else if (lsame_(pack, "Q")) { + ipack = 6; + } else if (lsame_(pack, "Z")) { + ipack = 7; + } else { + ipack = -1; + } + +/* Set certain internal parameters */ + + mnmin = min(*m,*n); +/* Computing MIN */ + i__1 = *kl, i__2 = *m - 1; + kll = min(i__1,i__2); +/* Computing MIN */ + i__1 = *ku, i__2 = *n - 1; + kuu = min(i__1,i__2); + +/* If inv(DL) is used, check to see if DL has a zero entry. */ + + dzero = FALSE_; + if (igrade == 4 && *model == 0) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + if (dl[i__] == 0.) { + dzero = TRUE_; + } +/* L10: */ + } + } + +/* Check values in IPIVOT */ + + badpvt = FALSE_; + if (ipvtng > 0) { + i__1 = npvts; + for (j = 1; j <= i__1; ++j) { + if (ipivot[j] <= 0 || ipivot[j] > npvts) { + badpvt = TRUE_; + } +/* L20: */ + } + } + +/* Set INFO if an error */ + + if (*m < 0) { + *info = -1; + } else if (*m != *n && isym == 0) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (idist == -1) { + *info = -3; + } else if (isym == -1) { + *info = -5; + } else if (*mode < -6 || *mode > 6) { + *info = -7; + } else if (*mode != -6 && *mode != 0 && *mode != 6 && *cond < 1.) { + *info = -8; + } else if (*mode != -6 && *mode != 0 && *mode != 6 && irsign == -1) { + *info = -10; + } else if (igrade == -1 || igrade == 4 && *m != *n || igrade >= 1 && + igrade <= 4 && isym == 0) { + *info = -11; + } else if (igrade == 4 && dzero) { + *info = -12; + } else if ((igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) && ( + *model < -6 || *model > 6)) { + *info = -13; + } else if ((igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) && ( + *model != -6 && *model != 0 && *model != 6) && *condl < 1.) { + *info = -14; + } else if ((igrade == 2 || igrade == 3) && (*moder < -6 || *moder > 6)) { + *info = -16; + } else if ((igrade == 2 || igrade == 3) && (*moder != -6 && *moder != 0 && + *moder != 6) && *condr < 1.) { + *info = -17; + } else if (ipvtng == -1 || ipvtng == 3 && *m != *n || (ipvtng == 1 || + ipvtng == 2) && isym == 0) { + *info = -18; + } else if (ipvtng != 0 && badpvt) { + *info = -19; + } else if (*kl < 0) { + *info = -20; + } else if (*ku < 0 || isym == 0 && *kl != *ku) { + *info = -21; + } else if (*sparse < 0. || *sparse > 1.) { + *info = -22; + } else if (ipack == -1 || (ipack == 1 || ipack == 2 || ipack == 5 || + ipack == 6) && isym == 1 || ipack == 3 && isym == 1 && (*kl != 0 + || *m != *n) || ipack == 4 && isym == 1 && (*ku != 0 || *m != *n)) + { + *info = -24; + } else if ((ipack == 0 || ipack == 1 || ipack == 2) && *lda < max(1,*m) || + (ipack == 3 || ipack == 4) && *lda < 1 || (ipack == 5 || ipack == + 6) && *lda < kuu + 1 || ipack == 7 && *lda < kll + kuu + 1) { + *info = -26; + } + + if (*info != 0) { + i__1 = -(*info); + xerbla_("DLATMR", &i__1); + return 0; + } + +/* Decide if we can pivot consistently */ + + fulbnd = FALSE_; + if (kuu == *n - 1 && kll == *m - 1) { + fulbnd = TRUE_; + } + +/* Initialize random number generator */ + + for (i__ = 1; i__ <= 4; ++i__) { + iseed[i__] = (i__1 = iseed[i__], abs(i__1)) % 4096; +/* L30: */ + } + + iseed[4] = (iseed[4] / 2 << 1) + 1; + +/* 2) Set up D, DL, and DR, if indicated. */ + +/* Compute D according to COND and MODE */ + + dlatm1_(mode, cond, &irsign, &idist, &iseed[1], &d__[1], &mnmin, info); + if (*info != 0) { + *info = 1; + return 0; + } + if (*mode != 0 && *mode != -6 && *mode != 6) { + +/* Scale by DMAX */ + + temp = abs(d__[1]); + i__1 = mnmin; + for (i__ = 2; i__ <= i__1; ++i__) { +/* Computing MAX */ + d__2 = temp, d__3 = (d__1 = d__[i__], abs(d__1)); + temp = max(d__2,d__3); +/* L40: */ + } + if (temp == 0. && *dmax__ != 0.) { + *info = 2; + return 0; + } + if (temp != 0.) { + alpha = *dmax__ / temp; + } else { + alpha = 1.; + } + i__1 = mnmin; + for (i__ = 1; i__ <= i__1; ++i__) { + d__[i__] = alpha * d__[i__]; +/* L50: */ + } + + } + +/* Compute DL if grading set */ + + if (igrade == 1 || igrade == 3 || igrade == 4 || igrade == 5) { + dlatm1_(model, condl, &c__0, &idist, &iseed[1], &dl[1], m, info); + if (*info != 0) { + *info = 3; + return 0; + } + } + +/* Compute DR if grading set */ + + if (igrade == 2 || igrade == 3) { + dlatm1_(moder, condr, &c__0, &idist, &iseed[1], &dr[1], n, info); + if (*info != 0) { + *info = 4; + return 0; + } + } + +/* 3) Generate IWORK if pivoting */ + + if (ipvtng > 0) { + i__1 = npvts; + for (i__ = 1; i__ <= i__1; ++i__) { + iwork[i__] = i__; +/* L60: */ + } + if (fulbnd) { + i__1 = npvts; + for (i__ = 1; i__ <= i__1; ++i__) { + k = ipivot[i__]; + j = iwork[i__]; + iwork[i__] = iwork[k]; + iwork[k] = j; +/* L70: */ + } + } else { + for (i__ = npvts; i__ >= 1; --i__) { + k = ipivot[i__]; + j = iwork[i__]; + iwork[i__] = iwork[k]; + iwork[k] = j; +/* L80: */ + } + } + } + +/* 4) Generate matrices for each kind of PACKing */ +/* Always sweep matrix columnwise (if symmetric, upper */ +/* half only) so that matrix generated does not depend */ +/* on PACK */ + + if (fulbnd) { + +/* Use DLATM3 so matrices generated with differing PIVOTing only */ +/* differ only in the order of their rows and/or columns. */ + + if (ipack == 0) { + if (isym == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + a[isub + jsub * a_dim1] = temp; + a[jsub + isub * a_dim1] = temp; +/* L90: */ + } +/* L100: */ + } + } else if (isym == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + a[isub + jsub * a_dim1] = temp; +/* L110: */ + } +/* L120: */ + } + } + + } else if (ipack == 1) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + a[mnsub + mxsub * a_dim1] = temp; + if (mnsub != mxsub) { + a[mxsub + mnsub * a_dim1] = 0.; + } +/* L130: */ + } +/* L140: */ + } + + } else if (ipack == 2) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + a[mxsub + mnsub * a_dim1] = temp; + if (mnsub != mxsub) { + a[mnsub + mxsub * a_dim1] = 0.; + } +/* L150: */ + } +/* L160: */ + } + + } else if (ipack == 3) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + +/* Compute K = location of (ISUB,JSUB) entry in packed */ +/* array */ + + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + k = mxsub * (mxsub - 1) / 2 + mnsub; + +/* Convert K to (IISUB,JJSUB) location */ + + jjsub = (k - 1) / *lda + 1; + iisub = k - *lda * (jjsub - 1); + + a[iisub + jjsub * a_dim1] = temp; +/* L170: */ + } +/* L180: */ + } + + } else if (ipack == 4) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + +/* Compute K = location of (I,J) entry in packed array */ + + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + if (mnsub == 1) { + k = mxsub; + } else { + k = *n * (*n + 1) / 2 - (*n - mnsub + 1) * (*n - + mnsub + 2) / 2 + mxsub - mnsub + 1; + } + +/* Convert K to (IISUB,JJSUB) location */ + + jjsub = (k - 1) / *lda + 1; + iisub = k - *lda * (jjsub - 1); + + a[iisub + jjsub * a_dim1] = temp; +/* L190: */ + } +/* L200: */ + } + + } else if (ipack == 5) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + if (i__ < 1) { + a[j - i__ + 1 + (i__ + *n) * a_dim1] = 0.; + } else { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + a[mxsub - mnsub + 1 + mnsub * a_dim1] = temp; + } +/* L210: */ + } +/* L220: */ + } + + } else if (ipack == 6) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + a[mnsub - mxsub + kuu + 1 + mxsub * a_dim1] = temp; +/* L230: */ + } +/* L240: */ + } + + } else if (ipack == 7) { + + if (isym == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + mnsub = min(isub,jsub); + mxsub = max(isub,jsub); + a[mnsub - mxsub + kuu + 1 + mxsub * a_dim1] = temp; + if (i__ < 1) { + a[j - i__ + 1 + kuu + (i__ + *n) * a_dim1] = 0.; + } + if (i__ >= 1 && mnsub != mxsub) { + a[mxsub - mnsub + 1 + kuu + mnsub * a_dim1] = + temp; + } +/* L250: */ + } +/* L260: */ + } + } else if (isym == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + kll; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + temp = dlatm3_(m, n, &i__, &j, &isub, &jsub, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + a[isub - jsub + kuu + 1 + jsub * a_dim1] = temp; +/* L270: */ + } +/* L280: */ + } + } + + } + + } else { + +/* Use DLATM2 */ + + if (ipack == 0) { + if (isym == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, + &idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); + a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; +/* L290: */ + } +/* L300: */ + } + } else if (isym == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, + &idist, &iseed[1], &d__[1], &igrade, &dl[1], & + dr[1], &ipvtng, &iwork[1], sparse); +/* L310: */ + } +/* L320: */ + } + } + + } else if (ipack == 1) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[i__ + j * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + if (i__ != j) { + a[j + i__ * a_dim1] = 0.; + } +/* L330: */ + } +/* L340: */ + } + + } else if (ipack == 2) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + a[j + i__ * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, & + idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[1] +, &ipvtng, &iwork[1], sparse); + if (i__ != j) { + a[i__ + j * a_dim1] = 0.; + } +/* L350: */ + } +/* L360: */ + } + + } else if (ipack == 3) { + + isub = 0; + jsub = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + ++isub; + if (isub > *lda) { + isub = 1; + ++jsub; + } + a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl, ku, + &idist, &iseed[1], &d__[1], &igrade, &dl[1], &dr[ + 1], &ipvtng, &iwork[1], sparse); +/* L370: */ + } +/* L380: */ + } + + } else if (ipack == 4) { + + if (isym == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + +/* Compute K = location of (I,J) entry in packed array */ + + if (i__ == 1) { + k = j; + } else { + k = *n * (*n + 1) / 2 - (*n - i__ + 1) * (*n - + i__ + 2) / 2 + j - i__ + 1; + } + +/* Convert K to (ISUB,JSUB) location */ + + jsub = (k - 1) / *lda + 1; + isub = k - *lda * (jsub - 1); + + a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl, + ku, &idist, &iseed[1], &d__[1], &igrade, &dl[ + 1], &dr[1], &ipvtng, &iwork[1], sparse); +/* L390: */ + } +/* L400: */ + } + } else { + isub = 0; + jsub = 1; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = j; i__ <= i__2; ++i__) { + ++isub; + if (isub > *lda) { + isub = 1; + ++jsub; + } + a[isub + jsub * a_dim1] = dlatm2_(m, n, &i__, &j, kl, + ku, &idist, &iseed[1], &d__[1], &igrade, &dl[ + 1], &dr[1], &ipvtng, &iwork[1], sparse); +/* L410: */ + } +/* L420: */ + } + } + + } else if (ipack == 5) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + if (i__ < 1) { + a[j - i__ + 1 + (i__ + *n) * a_dim1] = 0.; + } else { + a[j - i__ + 1 + i__ * a_dim1] = dlatm2_(m, n, &i__, & + j, kl, ku, &idist, &iseed[1], &d__[1], & + igrade, &dl[1], &dr[1], &ipvtng, &iwork[1], + sparse); + } +/* L430: */ + } +/* L440: */ + } + + } else if (ipack == 6) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, &i__, & + j, kl, ku, &idist, &iseed[1], &d__[1], &igrade, & + dl[1], &dr[1], &ipvtng, &iwork[1], sparse); +/* L450: */ + } +/* L460: */ + } + + } else if (ipack == 7) { + + if (isym == 0) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, & + i__, &j, kl, ku, &idist, &iseed[1], &d__[1], & + igrade, &dl[1], &dr[1], &ipvtng, &iwork[1], + sparse); + if (i__ < 1) { + a[j - i__ + 1 + kuu + (i__ + *n) * a_dim1] = 0.; + } + if (i__ >= 1 && i__ != j) { + a[j - i__ + 1 + kuu + i__ * a_dim1] = a[i__ - j + + kuu + 1 + j * a_dim1]; + } +/* L470: */ + } +/* L480: */ + } + } else if (isym == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j + kll; + for (i__ = j - kuu; i__ <= i__2; ++i__) { + a[i__ - j + kuu + 1 + j * a_dim1] = dlatm2_(m, n, & + i__, &j, kl, ku, &idist, &iseed[1], &d__[1], & + igrade, &dl[1], &dr[1], &ipvtng, &iwork[1], + sparse); +/* L490: */ + } +/* L500: */ + } + } + + } + + } + +/* 5) Scaling the norm */ + + if (ipack == 0) { + onorm = dlange_("M", m, n, &a[a_offset], lda, tempa); + } else if (ipack == 1) { + onorm = dlansy_("M", "U", n, &a[a_offset], lda, tempa); + } else if (ipack == 2) { + onorm = dlansy_("M", "L", n, &a[a_offset], lda, tempa); + } else if (ipack == 3) { + onorm = dlansp_("M", "U", n, &a[a_offset], tempa); + } else if (ipack == 4) { + onorm = dlansp_("M", "L", n, &a[a_offset], tempa); + } else if (ipack == 5) { + onorm = dlansb_("M", "L", n, &kll, &a[a_offset], lda, tempa); + } else if (ipack == 6) { + onorm = dlansb_("M", "U", n, &kuu, &a[a_offset], lda, tempa); + } else if (ipack == 7) { + onorm = dlangb_("M", n, &kll, &kuu, &a[a_offset], lda, tempa); + } + + if (*anorm >= 0.) { + + if (*anorm > 0. && onorm == 0.) { + +/* Desired scaling impossible */ + + *info = 5; + return 0; + + } else if (*anorm > 1. && onorm < 1. || *anorm < 1. && onorm > 1.) { + +/* Scale carefully to avoid over / underflow */ + + if (ipack <= 2) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + d__1 = 1. / onorm; + dscal_(m, &d__1, &a[j * a_dim1 + 1], &c__1); + dscal_(m, anorm, &a[j * a_dim1 + 1], &c__1); +/* L510: */ + } + + } else if (ipack == 3 || ipack == 4) { + + i__1 = *n * (*n + 1) / 2; + d__1 = 1. / onorm; + dscal_(&i__1, &d__1, &a[a_offset], &c__1); + i__1 = *n * (*n + 1) / 2; + dscal_(&i__1, anorm, &a[a_offset], &c__1); + + } else if (ipack >= 5) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = kll + kuu + 1; + d__1 = 1. / onorm; + dscal_(&i__2, &d__1, &a[j * a_dim1 + 1], &c__1); + i__2 = kll + kuu + 1; + dscal_(&i__2, anorm, &a[j * a_dim1 + 1], &c__1); +/* L520: */ + } + + } + + } else { + +/* Scale straightforwardly */ + + if (ipack <= 2) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + d__1 = *anorm / onorm; + dscal_(m, &d__1, &a[j * a_dim1 + 1], &c__1); +/* L530: */ + } + + } else if (ipack == 3 || ipack == 4) { + + i__1 = *n * (*n + 1) / 2; + d__1 = *anorm / onorm; + dscal_(&i__1, &d__1, &a[a_offset], &c__1); + + } else if (ipack >= 5) { + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = kll + kuu + 1; + d__1 = *anorm / onorm; + dscal_(&i__2, &d__1, &a[j * a_dim1 + 1], &c__1); +/* L540: */ + } + } + + } + + } + +/* End of DLATMR */ + + return 0; +} /* dlatmr_ */ diff --git a/examples/eig/main.c b/examples/eig/main.c new file mode 100644 index 0000000..34034e0 --- /dev/null +++ b/examples/eig/main.c @@ -0,0 +1,459 @@ +#include +#include +#include +#include +#include +#include +#ifdef __cplusplus +extern "C" { +#endif +#include "cblas.h" +#ifdef __cplusplus +} +#endif + +/*----------------------------------------------------------------------------- +* Timing Setup +*----------------------------------------------------------------------------*/ +struct timespec t0,t1; +#define tick() clock_gettime(CLOCK_MONOTONIC, &t0); +#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \ + t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9) + +/* Auxiliary routines prototypes */ +void print_eigenvalues(char * desc, int n, double * vector_r, double * vector_i); +void print_eigenvectors(char * desc, int n, double * vector_i, double * v, int ldv); +void form_diag_matrix(double complex *matrix_diag, double *vector_r, double *vector_i, int n); +void form_eigen_matrix(double complex *mat, int n, double *vector_i, double *v, int ldv); +void print_real_matrix(char * desc, double * mat, int n, int m, int ldv); +void print_complex_matrix(char * desc, double complex *mat, int n, int m, int ldv); +double compute_evd_error(double complex *matrix_reconstruct, double *matrix_original, int n, int ld); +void print_performance(char * func_name, int *matrix_sizes, float *time, int num_test, int num_run); + +/* LAPACK routines */ +extern int zlacpy_(char *, int *, int *, double complex *, int *, + double complex *, int *); +extern int dlatmr_(int *, int *, char *, int *, char *, + double *, int *, double *, double *, char *, char *, + double *, int *, double *, double *, int *, + double *, char *, int *, int *, int *, + double *, double *, char *, double *, int *, + int *, int *); +extern int zgetrf_(int *, int *, double complex *, int *, int *, int *); +extern int dgeev_ (char *, char *, int *, double *, + int *, double *, double *, double *, int *, + double *, int *, double *, int *, int *); +extern int zgetri_(int *, double complex *, int *, int *, + double complex *, int *, int *); +extern int zgemm_(char *, char *, int *, int *, int *, double complex *, double complex *, + int *, double complex *, int *, double complex *, double complex *, int *); + + +/* Parameters */ +#define MAX_ALLOWED_ERR (1e-10) + +/* Main program */ + +int print_data; + +int main(int argc, char *argv[]) +{ + /* Locals */ + float time, time_ev, time_cinv, *time_ev_all_runs, *time_cinv_all_runs; + int num_test, num_run, i, j, n, n_min, n_inc; + int lda, ldl, ldr, info, lwork; + double wkopt; + double* work; + int *vector_pivot, *vector_work; + double max_error_evd; + double *matrix_A, *matrix_A_cpy, *matrix_R, *matrix_L; + double *vector_diag, *vector_diagl, *vector_diagr, *vector_r, *vector_i; + double complex *matrix_D, *matrix_P, *matrix_invP, *matrix_A_cmplx, *work_dcomplex, lwork_opt; + int *matrix_sizes; + int const_int6 = 6; + double const_dbr1 = 1.; + double const_dbr0 = 0.; + double complex const_dbc1 = 1. + 0.*I; + double complex const_dbc0 = 0. + 0.*I; + int rand_seed[4] = {10,123,278,3579}; /* numbers must lie between 0 and 4095 */ + + fprintf(stdout, "\n\n======= LAPACK example: eigen decomposition =======\n"); + if(argc == 1) { /* no command line arguments, use default */ + num_test = 3; + n_min = 100; + n_inc = 100; + num_run = 1; + print_data = 0; + } + else if (argc < 6) { + printf("Usage: ludinv \n"); + exit(0); + } + else { + num_test = atoi(argv[1]); + n_min = atoi(argv[2]); + n_inc = atoi(argv[3]); + num_run = atoi(argv[4]); + print_data = atoi(argv[5]); + } + rand_seed[0] = 4675; + + matrix_sizes = (int*)malloc(sizeof(int)*num_test); + time_ev_all_runs = (float *)malloc(sizeof(float)*num_test*num_run); + time_cinv_all_runs = (float *)malloc(sizeof(float)*num_test*num_run); + + for(i=0, n=n_min; i 0 ) { + printf("The eigen solution algorithm failed to compute eigenvalues."); + printf("To generate a new matrix to test the algorithm.\n"); + goto START_NEW_RUN; + } + + printf("Eigen-decomposition finished successfully.\n"); + print_eigenvalues( "Eigenvalues", n, vector_r, vector_i ); + print_eigenvectors( "Right eigenvectors", n, vector_i, matrix_R, ldr ); + + /* Form matrix P using right eigenvectors as the columns. */ + form_eigen_matrix(matrix_P, n, vector_i, matrix_R, ldr); + print_complex_matrix("Eigenvector matrix P:", matrix_P, n, n, n); + + /* Form matrix D using eigenvalues as the diagonal elements. */ + form_diag_matrix(matrix_D, vector_r, vector_i, n); + print_complex_matrix("Complex diagonal matrix D: ", matrix_D, n, n, n); + + /* Compute inverse of P: first perform LU decomposition on P. */ + printf("Compute inverse of P such that A = PDP^-1.\n"); + + zlacpy_("Full", &n, &n, matrix_P, &n, matrix_invP, &n); + + tick(); + zgetrf_(&n, &n, matrix_invP, &n, vector_pivot, &info); + time_cinv = tock(); /* time for complex matrix inversion */ + + if( info != 0 ) { + printf("Return error value of zgetrf_(): %d.\n", info); + printf("The LU decomposition failed."); + printf("To generate a new matrix to test the algorithm.\n"); + goto START_NEW_RUN; + } + + print_complex_matrix("matrix P LU decomposition results", matrix_invP, n, n, n); + + tick(); + + /* Query and allocate the optimal workspace for matrix inversion + based on LU decompostion. */ + lwork = -1; + zgetri_(&n, matrix_invP, &n, vector_pivot, &lwork_opt, &lwork, &info); + + lwork = (int)creal(lwork_opt); /* get optimal workspace size returned by zgetri_() */ + work_dcomplex = (double complex *)malloc( lwork*sizeof(double complex) ); + + /* Compute inverse of P based on LUD results. */ + zgetri_(&n, matrix_invP, &n, vector_pivot, work_dcomplex, &lwork, &info); + free( (void*)work_dcomplex ); + + time_cinv += tock(); + time_cinv_all_runs[i*num_run+j] = time_cinv; + + if( info != 0 ) { + printf("Return error value of zgetri_(): %d.\n", info); + printf("Matrix P inversion through LU decomposition failed."); + printf("To generate a new matrix to test the algorithm.\n"); + goto START_NEW_RUN; + } + printf("Matrix P inversion finished successfully."); + print_complex_matrix("Inverse of eigenvector matrix P:", matrix_invP, n, n, n); + + /* Compute P*D*inv(P). */ + work_dcomplex = (double complex *)malloc(sizeof(double complex)*n*n); + zgemm_("N", "N", &n, &n, &n, &const_dbc1, matrix_P, &n, matrix_D, &n, &const_dbc0, work_dcomplex, &n); + //EDMA_Free(); + + matrix_A_cmplx = matrix_P; /* reuse memory */ + zgemm_("N", "N", &n, &n, &n, &const_dbc1, work_dcomplex, &n, matrix_invP, &n, &const_dbc0, matrix_A_cmplx, &n); + print_complex_matrix("P*D*inv(P):", matrix_A_cmplx, n, n, n); + print_real_matrix("Original matrix A", matrix_A_cpy, n, n, n); + + /* Compute error: A-P*D*inv(P) */ + max_error_evd = compute_evd_error(matrix_A_cmplx, matrix_A_cpy, n, lda); + + printf("\nMaximum relative error of EVD: %e\n", max_error_evd); + + if(max_error_evd > MAX_ALLOWED_ERR) { + printf("Error is out of tolerance range.\n"); + exit(-1); + } + + free( (void*)work_dcomplex ); + + j++; + +START_NEW_RUN: + printf("Current run finished. \n"); + + /* Free workspace */ + __free_ddr((void *)matrix_A); + __free_ddr((void *)vector_pivot); + free((void *)vector_work ); + free((void *)vector_diag ); + free((void *)vector_diagl); + free((void *)vector_diagr); + __free_ddr((void *)vector_r); + __free_ddr((void *)vector_i); + __free_ddr((void *)matrix_L); + __free_ddr((void *)matrix_R); + free((void *)matrix_A_cpy); + __free_ddr((void *)matrix_D); + __free_ddr((void *)matrix_P); + __free_ddr((void *)matrix_invP); + } /* while(j= 0) { + printf( " %10.5f + %10.5fi", real, imag); + } + else { + printf( " %10.5f - %10.5fi", real, abs(imag)); + } + } + printf( "\n" ); + } +} + +double compute_evd_error(double complex *matrix_reconstruct, double *matrix_original, int n, int ld) +{ + int i, j; + double err_real, err_imag; + double abs_err; + double max_err = 0.; + + for(i=0; i max_err) { + max_err = abs_err; + } + } + } + + return max_err; +} + +void print_performance(char * func_name, int *matrix_sizes, float *time, int num_test, int num_run) +{ + int i, j; + float inst, min, max; + float tot, avg; + + printf("Time (second) of %s for all matrix sizes:\n", func_name); + printf("Matrix size "); + printf(" Min Max Average "); + printf("\n"); + + for(i=0; imax) max = inst; + if(inst \n"); - exit(0); - } - else { + else if (argc < 6) { + printf("Usage: ludinv \n"); + exit(0); + } + else { num_test = atoi(argv[1]); n_min = atoi(argv[2]); n_inc = atoi(argv[3]); num_run = atoi(argv[4]); print_data = atoi(argv[5]); - } + } rand_seed[0] = 4675; - + secs_all_runs = (float *)malloc(sizeof(float)*num_test*num_run); matrix_sizes = (int*)malloc(sizeof(int)*num_test); @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { if(info != 0) { printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", info); //goto START_NEW_RUN; - exit(0); + exit(-1); } memcpy((void *)matrix_LUD, (void *)matrix_A, sizeof(double)*n*n); @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) { if(info != 0) { printf("LU factorization has a problem. Returned error value of dgetrf is %d. Start a new run. \n", info); //goto START_NEW_RUN; - exit(0); + exit(-1); } printf("LU decomposition finished. Time spent on LUD: %f seconds.\n", secs_lud_inv); @@ -161,7 +161,7 @@ int main(int argc, char *argv[]) { printf("Matrix inversion through LU factorization has a problem."); printf("Returned error value of dgetri is %d. Start a new run. \n", info); //goto START_NEW_RUN; - exit(0); + exit(-1); } secs_lud_inv += secs; @@ -175,6 +175,11 @@ int main(int argc, char *argv[]) { /* Compute A*inv(A) and precision error: I-A*inv(A) */ max_abs_error = compute_inverse_error(matrix_A, matrix_LUD, n, lda); printf("Maximum relative error of matrix inversion by LUD: %e\n", max_abs_error); + + if(max_abs_error > MAX_ALLOWED_ERR) { + printf("Error is out of tolerance range.\n"); + exit(-1); + } j++; @@ -193,6 +198,8 @@ START_NEW_RUN: print_performance(matrix_sizes, secs_all_runs, num_test, num_run); + printf("Passed.\n"); + exit( 0 ); } @@ -291,5 +298,4 @@ void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int nu avg = tot/(double)num_run; printf("%12f%12f%12f\n", min, max, avg); } - } diff --git a/examples/matmpy/Makefile b/examples/matmpy/Makefile index 86f9bf4..bbf05fe 100644 --- a/examples/matmpy/Makefile +++ b/examples/matmpy/Makefile @@ -8,17 +8,17 @@ $(EXE): main.o alltests: ARMtest DSPtest OPTtest ARMtest: $(EXE) - @echo "Forcing execution on ARM."; \ + @echo "Forcing BLAS level 3 execution on ARM."; \ export TI_CBLAS_OFFLOAD=000; \ ./$(EXE); DSPtest: $(EXE) - @echo "Forcing execution on DSP."; \ + @echo "Forcing BLAS level 3 execution on DSP."; \ export TI_CBLAS_OFFLOAD=001; \ ./$(EXE); OPTtest: $(EXE) - @echo "Optimal execution on ARM or DSP."; \ + @echo "Optimal BLAS level 3 execution on ARM or DSP."; \ export TI_CBLAS_OFFLOAD=002; \ ./$(EXE); diff --git a/examples/matmpy/main.c b/examples/matmpy/main.c index 993c6be..c6266e2 100644 --- a/examples/matmpy/main.c +++ b/examples/matmpy/main.c @@ -12,7 +12,7 @@ #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \ t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9) #define fout stdout - +#define REF_CHECKSUM (1925318033.9552) double *A, *B, *C; int m, n, k; @@ -23,7 +23,6 @@ double secs = 0.0; static void report_flops(double secs, int m, int n, int k, int N) { fprintf(fout,"Total time for %d tests: %8.6fs, %5.3f Gflops\n", -// N, secs, (float)N*m*n*(2*k-1) / (secs * 1e6)); N, secs, (float)N*2*m*n*k / (secs * 1e9)); } @@ -63,7 +62,7 @@ int main() /* configuration */ m = k = n = 1000; alpha = 0.7; - beta = 1.3; + beta = 1.3; /* allocate the matrices */ A = (double *)__malloc_ddr( m*k*sizeof( double ) ); @@ -76,21 +75,17 @@ int main() __free_ddr(C); return 1; } - + srand(123456789); - /* Force BLAS execution on ARM due to insufficient MSMC memory. - This will be removed later. */ - putenv("TI_CBLAS_OFFLOAD=000"); - /* Check the environment variable that controls offloading */ ti_cblas_offload_env = getenv("TI_CBLAS_OFFLOAD"); if(ti_cblas_offload_env == NULL) { - printf("Environment variable TI_CBLAS_OFFLOAD is not defined. "); + printf("Environment variable TI_CBLAS_OFFLOAD is not defined."); printf("Use default offloading configuration:\n"); - printf("\tBLAS level 1: running on ARM.\n"); - printf("\tBLAS level 2: running on ARM.\n"); - printf("\tBLAS level 3: running on ARM or DSP based on matrix sizes.\n"); + printf("\tBLAS level 1: running on ARM.\n"); + printf("\tBLAS level 2: running on ARM.\n"); + printf("\tBLAS level 3: running on ARM or DSP based on matrix sizes.\n"); } else { printf("TI_CBLAS_OFFLOAD is defined as %s\n", ti_cblas_offload_env); @@ -105,11 +100,15 @@ int main() secs = 0; printf("Now doing %d tests after warming caches\n", numtests); - for (t=0; t