Context Navigation

source: rtems/testsuites/benchmarks/linpack/linpack-pc.c @ 8c1f4064

5

Last change on this file since 8c1f4064 was 8c1f4064, checked in by Sebastian Huber <sebastian.huber@…>, on 11/02/17 at 12:56:12

tests: Use printf() instead of fprintf()

Update #3170.
Update #3199.

Property mode set to 100644

File size: 37.5 KB

Rev	Line
[8783660]	1	/*
	2	* Linpack 100x100 Benchmark In C/C++ For PCs
	3	*
	4	********************************************************************
	5	*
	6	* Original Source from NETLIB
	7	*
	8	* Translated to C by Bonnie Toy 5/88 (modified on 2/25/94 to fix
	9	* a problem with daxpy for unequal increments or equal increments
	10	* not equal to 1. Jack Dongarra)
	11	*
	12	* To obtain rolled source BLAS, add -DROLL to the command lines.
	13	* To obtain unrolled source BLAS, add -DUNROLL to the command lines.
	14	*
	15	* You must specify one of -DSP or -DDP to compile correctly.
	16	*
	17	* You must specify one of -DROLL or -DUNROLL to compile correctly.
	18	*
	19	********************************************************************
	20	*
	21	* Changes in this version
	22	*
	23	* 1. Function prototypes are declared and function headers have
	24	* embedded parameter types to produce code for C and C++
	25	*
	26	* 2. Arrays aa and a are declared as [200200] and [200201] to
	27	* allow compilation with prototypes.
	28	*
	29	* 3. Function second changed (compiler dependent).
	30	*
	31	* 4. Timing method changed due to inaccuracy of PC clock (see below).
	32	*
	33	* 5. Additional date function included (compiler dependent).
	34	*
	35	* 6. Additional code used as a standard for a series of benchmarks:-
	36	* Automatic run time calibration rather than fixed parameters
	37	* Initial calibration with display to show linearity
	38	* Results displayed at reasonable rate for viewing (5 seconds)
	39	* Facilities for typing in details of system used etc.
	40	* Compiler details in code in case .exe files used elsewhere
	41	* Results appended to a text file (Linpack.txt)
	42	*
	43	* Roy Longbottom 101323.2241@compuserve.com 14 September 1996
	44	*
	45	************************************************************************
	46	*
	47	* Timing
	48	*
	49	* The PC timer is updated at about 18 times per second or resolution of
	50	* 0.05 to 0.06 seconds which is similar to the time taken by the main
	51	* time consuming function dgefa on a 100 MHz Pentium. Thus there is no
	52	* point in running the dgefa/dges1 combination three times as in the
	53	* original version. Main timing for the latter, in the loop run NTIMES,
	54	* executes matgen/dgefa, summing the time taken by matgen within the
	55	* loop for later deduction from the total time. On a modern PC this sum
	56	* can be based on a random selection of 0 or 0.05/0.06. This version
	57	* executes the single pass once and the main timing loop five times,
	58	* calculating the matgen overhead separately.
	59	*
	60	*************************************************************************
	61	*
	62	* Example of Output
	63	*
	64	* Rolled Double Precision Linpack Benchmark - PC Version in 'C/C++'
	65	*
	66	* Compiler Watcom C/C++ 10.5 Win 386
	67	* Optimisation -zp4 -otexan -fp5 -5r -dDP -dROLL
	68	*
	69	*
	70	* norm resid resid machep x[0]-1 x[n-1]-1
	71	* 0.4 7.41628980e-014 1.00000000e-015 -1.49880108e-014 -1.89848137e-014
	72	*
	73	*
	74	* Times are reported for matrices of order 100
	75	* 1 pass times for array with leading dimension of 201
	76	*
	77	* dgefa dgesl total Mflops unit ratio
	78	* 0.06000 0.00000 0.06000 11.44 0.1748 1.0714
	79	*
	80	*
	81	* Calculating matgen overhead
	82	*
	83	* 10 times 0.11 seconds
	84	* 20 times 0.22 seconds
	85	* 40 times 0.44 seconds
	86	* 80 times 0.87 seconds
	87	* 160 times 1.76 seconds
	88	* 320 times 3.52 seconds
	89	* 640 times 7.03 seconds
	90	*
	91	* Overhead for 1 matgen 0.01098 seconds
	92	*
	93	*
	94	* Calculating matgen/dgefa passes for 5 seconds
	95	*
	96	* 10 times 0.71 seconds
	97	* 20 times 1.38 seconds
	98	* 40 times 2.80 seconds
	99	* 80 times 5.66 seconds
	100	*
	101	* Passes used 70
	102	*
	103	* This is followed by output of the normal data for dgefa, dges1,
	104	* total, Mflops, unit and ratio with five sets of results for each.
	105	*
	106	************************************************************************
	107	*
	108	* Example from output file Linpack.txt
	109	*
	110	* LINPACK BENCHMARK FOR PCs 'C/C++' n @ 100
	111	*
	112	* Month run 9/1996
	113	* PC model Escom
	114	* CPU Pentium
	115	* Clock MHz 100
	116	* Cache 256K
	117	* Options Neptune chipset
	118	* OS/DOS Windows 95
	119	* Compiler Watcom C/C++ 10.5 Win 386
	120	* OptLevel -zp4 -otexan -fp5 -5r -dDP -dROLL
	121	* Run by Roy Longbottom
	122	* From UK
	123	* Mail 101323.2241@compuserve.com
	124	*
	125	* Rolling Rolled
	126	* Precision Double
	127	* norm. resid 0.4
	128	* resid 7.41628980e-014
	129	* machep 1.00000000e-015 (8.88178420e-016 NON OPT)
	130	* x[0]-1 -1.49880108e-014
	131	* x[n-1]-1 -1.89848137e-014
	132	* matgen 1 seconds 0.01051
	133	* matgen 2 seconds 0.01050
	134	* Repetitions 70
	135	* Leading dimension 201
	136	* dgefa dgesl total Mflops
	137	* 1 pass seconds 0.06000 0.00000 0.06000
	138	* Repeat seconds 0.06092 0.00157 0.06249 10.99
	139	* Repeat seconds 0.06077 0.00157 0.06234 11.01
	140	* Repeat seconds 0.06092 0.00157 0.06249 10.99
	141	* Repeat seconds 0.06092 0.00157 0.06249 10.99
	142	* Repeat seconds 0.06092 0.00157 0.06249 10.99
	143	* Average 10.99
	144	* Leading dimension 200
	145	* Repeat seconds 0.05936 0.00157 0.06093 11.27
	146	* Repeat seconds 0.05936 0.00157 0.06093 11.27
	147	* Repeat seconds 0.05864 0.00157 0.06021 11.40
	148	* Repeat seconds 0.05936 0.00157 0.06093 11.27
	149	* Repeat seconds 0.05864 0.00157 0.06021 11.40
	150	* Average 11.32
	151	*
	152	************************************************************************
	153	*
	154	* Examples of Results
	155	*
	156	* Precompiled codes were produced via a Watcom C/C++ 10.5 compiler.
	157	* Versions are available for DOS, Windows 3/95 and NT/Win 95. Both
	158	* non-optimised and optimised programs are available. The latter has
	159	* options as in the above example. Although these options can place
	160	* functions in-line, in this case, daxpy is not in-lined. Optimisation
	161	* reduces 18 instructions in the loop in this function to the following:
	162	*
	163	* L85 fld st(0)
	164	* fmul qword ptr [edx]
	165	* add eax,00000008H
	166	* add edx,00000008H
	167	* fadd qword ptr -8H[eax]
	168	* inc ebx
	169	* fstp qword ptr -8H[eax]
	170	* cmp ebx,esi
	171	* jl L85
	172	*
	173	* Results produced are not consistent between runs but produce similar
	174	* speeds when executing at a particular dimension (see above). An example
	175	* of other results is 11.4/10.5 Mflops. Most typical double precision
	176	* rolled results are:
	177	*
	178	* Opt No Opt Version/
	179	* MHz Cache Mflops Mflops Make/Options Via
	180	*
	181	* AM80386DX 40 128K 0.53 0.36 Clone Win/W95
	182	* 80486DX2 66 128K 2.5 1.9 Escom SIS chipset Win/W95
	183	* 80486DX2 66 128K 2.3 1.9 Escom SIS chipset NT/W95
	184	* 80486DX2 66 128K 2.8 2.0 Escom SIS chipset Dos/Dos
	185	* Pentium 100 256K 11 4.2 Escom Neptune chipset Win/W95
	186	* Pentium 100 256K 11 5.5 Escom Neptune chipset NT/W95
	187	* Pentium 100 256K 12 4.4 Escom Neptune chipset Dos/Dos
	188	* Pentium Pro 200 256K 48 19 Dell XPS Pro200n NT/NT
	189	*
	190	* The results are as produced when compiled as Linpack.cpp. Compiling as
	191	* Linpack.c gives similar speeds but the code is a little different.
	192	*
	193	***************************************************************************
	194	*/
	195
[0a16d5f]	196	#define DP
	197	#define ROLL
[8783660]	198
	199	#ifdef SP
	200	#define REAL float
	201	#define ZERO 0.0
	202	#define ONE 1.0
	203	#define PREC "Single "
	204	#endif
	205
	206	#ifdef DP
	207	#define REAL double
	208	#define ZERO 0.0e0
	209	#define ONE 1.0e0
	210	#define PREC "Double "
	211	#endif
	212
	213	#ifdef ROLL
	214	#define ROLLING "Rolled "
	215	#endif
	216	#ifdef UNROLL
	217	#define ROLLING "Unrolled "
	218	#endif
	219
	220
[0a16d5f]	221	#define NTIMES atoi(argv[1])
[8783660]	222
	223	#include <stdio.h>
	224	#include <math.h>
	225	#include <stdlib.h>
[8c1f4064]	226	#ifdef __rtems__
	227	#include <tmacros.h>
	228	#undef print_time
	229	#define fprintf(f, ...) rtems_printf(&rtems_test_printer, __VA_ARGS__)
	230	#endif /* __rtems__ */
[8783660]	231
	232
	233	static REAL atime[9][15];
	234
	235	void print_time (int row);
	236	void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma);
	237	void dgefa (REAL a[], int lda, int n, int ipvt[], int *info);
	238	void dgesl (REAL a[],int lda,int n,int ipvt[],REAL b[],int job);
	239	void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
	240	void daxpy (int n, REAL da, REAL dx[], int incx, REAL dy[], int incy);
	241	REAL epslon (REAL x);
	242	int idamax (int n, REAL dx[], int incx);
	243	void dscal (int n, REAL da, REAL dx[], int incx);
	244	REAL ddot (int n, REAL dx[], int incx, REAL dy[], int incy);
	245
	246	/* TIME TIME TIME TIME TIME TIME TIME TIME TIME TIME TIME TIME TIME */
[0a16d5f]	247	#include <sys/time.h> /* for following time functions only */
	248	static REAL second(void)
[8783660]	249	{
[0a16d5f]	250	struct timeval tv;
[8783660]	251
[0a16d5f]	252	gettimeofday(&tv, NULL);
	253	return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6;
[8783660]	254	}
	255
[0a16d5f]	256	int main (int argc, char **argv)
[8783660]	257	{
	258	static REAL aa[200200],a[200201],b[200],x[200];
	259	REAL cray,ops,total,norma,normx;
	260	REAL resid,residn,eps,t1,tm2,epsn,x1,x2;
	261	REAL mflops;
	262	static int ipvt[200],n,i,j,ntimes,info,lda,ldaa;
[0a16d5f]	263	int pass, loop;
[8783660]	264	REAL overhead1, overhead2, time1, time2;
[0a16d5f]	265	char compiler, options;
[8783660]	266
	267	/************************************************************************
	268	* Enter details of compiler and options used *
	269	************************************************************************/
	270	/----------------- --------- --------- ---------/
	271	compiler = "INSERT COMPILER NAME HERE";
	272	options = "INSERT OPTIMISATION OPTIONS HERE";
	273	/* Include -dDP or -dSP and -dROLL or -dUNROLL */
	274
	275	lda = 201;
	276	ldaa = 200;
	277	cray = .056;
	278	n = 100;
	279
	280	fprintf(stdout,ROLLING);fprintf(stdout,PREC);
	281	fprintf(stdout,"Precision Linpack Benchmark - PC Version in 'C/C++'\n\n");
	282	fprintf(stdout,"Compiler %s\n",compiler);
	283	fprintf(stdout,"Optimisation %s\n\n",options);
	284
	285	ops = (2.0e0(nnn))/3.0 + 2.0(n*n);
	286
	287	matgen(a,lda,n,b,&norma);
	288	t1 = second();
	289	dgefa(a,lda,n,ipvt,&info);
	290	atime[0][0] = second() - t1;
	291	t1 = second();
	292	dgesl(a,lda,n,ipvt,b,0);
	293	atime[1][0] = second() - t1;
	294	total = atime[0][0] + atime[1][0];
	295
	296	/* compute a residual to verify results. */
	297
	298	for (i = 0; i < n; i++) {
	299	x[i] = b[i];
	300	}
	301	matgen(a,lda,n,b,&norma);
	302	for (i = 0; i < n; i++) {
	303	b[i] = -b[i];
	304	}
	305	dmxpy(n,b,n,lda,x,a);
	306	resid = 0.0;
	307	normx = 0.0;
	308	for (i = 0; i < n; i++) {
	309	resid = (resid > fabs((double)b[i]))
	310	? resid : fabs((double)b[i]);
	311	normx = (normx > fabs((double)x[i]))
	312	? normx : fabs((double)x[i]);
	313	}
	314	eps = epslon(ONE);
	315	residn = resid/( nnormanormx*eps );
	316	epsn = eps;
	317	x1 = x[0] - 1;
	318	x2 = x[n-1] - 1;
	319
	320	printf("norm resid resid machep");
	321	printf(" x[0]-1 x[n-1]-1\n");
	322	printf("%6.1f %17.8e%17.8e%17.8e%17.8e\n\n",
	323	(double)residn, (double)resid, (double)epsn,
	324	(double)x1, (double)x2);
	325
[0a16d5f]	326	fprintf(stdout,"Times are reported for matrices of order %5d\n",n);
	327	fprintf(stdout,"1 pass times for array with leading dimension of%5d\n\n",lda);
	328	fprintf(stdout," dgefa dgesl total Mflops unit");
	329	fprintf(stdout," ratio\n");
[8783660]	330
	331	atime[2][0] = total;
	332	if (total > 0.0)
	333	{
	334	atime[3][0] = ops/(1.0e6*total);
	335	atime[4][0] = 2.0/atime[3][0];
	336	}
	337	else
	338	{
	339	atime[3][0] = 0.0;
	340	atime[4][0] = 0.0;
	341	}
	342	atime[5][0] = total/cray;
	343
	344	print_time(0);
	345
	346	/************************************************************************
	347	* Calculate overhead of executing matgen procedure *
	348	************************************************************************/
	349
[0a16d5f]	350	fprintf (stdout,"\nCalculating matgen overhead\n");
[8783660]	351	pass = -20;
	352	loop = NTIMES;
	353	do
	354	{
	355	time1 = second();
	356	pass = pass + 1;
	357	for ( i = 0 ; i < loop ; i++)
	358	{
	359	matgen(a,lda,n,b,&norma);
	360	}
	361	time2 = second();
	362	overhead1 = (time2 - time1);
[0a16d5f]	363	fprintf (stdout,"%10d times %6.2f seconds\n", loop, overhead1);
[8783660]	364	if (overhead1 > 5.0)
	365	{
	366	pass = 0;
	367	}
	368	if (pass < 0)
	369	{
	370	if (overhead1 < 0.1)
	371	{
	372	loop = loop * 10;
	373	}
	374	else
	375	{
	376	loop = loop * 2;
	377	}
	378	}
	379	}
	380	while (pass < 0);
	381
	382	overhead1 = overhead1 / (double)loop;
	383
[0a16d5f]	384	fprintf (stdout,"Overhead for 1 matgen %12.5f seconds\n\n", overhead1);
[8783660]	385
	386	/************************************************************************
	387	* Calculate matgen/dgefa passes for 5 seconds *
	388	************************************************************************/
	389
[0a16d5f]	390	fprintf (stdout,"Calculating matgen/dgefa passes for 5 seconds\n");
[8783660]	391	pass = -20;
	392	ntimes = NTIMES;
	393	do
	394	{
	395	time1 = second();
	396	pass = pass + 1;
	397	for ( i = 0 ; i < ntimes ; i++)
	398	{
	399	matgen(a,lda,n,b,&norma);
	400	dgefa(a,lda,n,ipvt,&info );
	401	}
	402	time2 = second() - time1;
[0a16d5f]	403	fprintf (stdout,"%10d times %6.2f seconds\n", ntimes, time2);
[8783660]	404	if (time2 > 5.0)
	405	{
	406	pass = 0;
	407	}
	408	if (pass < 0)
	409	{
	410	if (time2 < 0.1)
	411	{
	412	ntimes = ntimes * 10;
	413	}
	414	else
	415	{
	416	ntimes = ntimes * 2;
	417	}
	418	}
	419	}
	420	while (pass < 0);
	421
	422	ntimes = 5.0 * (double)ntimes / time2;
	423	if (ntimes == 0) ntimes = 1;
	424
[0a16d5f]	425	fprintf (stdout,"Passes used %10d \n\n", ntimes);
	426	fprintf(stdout,"Times for array with leading dimension of%4d\n\n",lda);
	427	fprintf(stdout," dgefa dgesl total Mflops unit");
	428	fprintf(stdout," ratio\n");
[8783660]	429
	430	/************************************************************************
	431	* Execute 5 passes *
	432	************************************************************************/
	433
	434	tm2 = ntimes * overhead1;
	435	atime[3][6] = 0;
	436
	437	for (j=1 ; j<6 ; j++)
	438	{
	439
	440	t1 = second();
	441
	442	for (i = 0; i < ntimes; i++)
	443	{
	444	matgen(a,lda,n,b,&norma);
	445	dgefa(a,lda,n,ipvt,&info );
	446	}
	447
	448	atime[0][j] = (second() - t1 - tm2)/ntimes;
	449
	450	t1 = second();
	451
	452	for (i = 0; i < ntimes; i++)
	453	{
	454	dgesl(a,lda,n,ipvt,b,0);
	455	}
	456
	457	atime[1][j] = (second() - t1)/ntimes;
	458	total = atime[0][j] + atime[1][j];
	459	atime[2][j] = total;
	460	atime[3][j] = ops/(1.0e6*total);
	461	atime[4][j] = 2.0/atime[3][j];
	462	atime[5][j] = total/cray;
	463	atime[3][6] = atime[3][6] + atime[3][j];
	464
	465	print_time(j);
	466	}
	467	atime[3][6] = atime[3][6] / 5.0;
[0a16d5f]	468	fprintf (stdout,"Average %11.2f\n",
[8783660]	469	(double)atime[3][6]);
	470
[0a16d5f]	471	fprintf (stdout,"\nCalculating matgen2 overhead\n");
[8783660]	472
	473	/************************************************************************
	474	* Calculate overhead of executing matgen procedure *
	475	************************************************************************/
	476
	477	time1 = second();
	478	for ( i = 0 ; i < loop ; i++)
	479	{
	480	matgen(aa,ldaa,n,b,&norma);
	481	}
	482	time2 = second();
	483	overhead2 = (time2 - time1);
	484	overhead2 = overhead2 / (double)loop;
	485
[0a16d5f]	486	fprintf (stdout,"Overhead for 1 matgen %12.5f seconds\n\n", overhead2);
	487	fprintf(stdout,"Times for array with leading dimension of%4d\n\n",ldaa);
	488	fprintf(stdout," dgefa dgesl total Mflops unit");
	489	fprintf(stdout," ratio\n");
[8783660]	490
	491	/************************************************************************
	492	* Execute 5 passes *
	493	************************************************************************/
	494
	495	tm2 = ntimes * overhead2;
	496	atime[3][12] = 0;
	497
	498	for (j=7 ; j<12 ; j++)
	499	{
	500
	501	t1 = second();
	502
	503	for (i = 0; i < ntimes; i++)
	504	{
	505	matgen(aa,ldaa,n,b,&norma);
	506	dgefa(aa,ldaa,n,ipvt,&info );
	507	}
	508
	509	atime[0][j] = (second() - t1 - tm2)/ntimes;
	510
	511	t1 = second();
	512
	513	for (i = 0; i < ntimes; i++)
	514	{
	515	dgesl(aa,ldaa,n,ipvt,b,0);
	516	}
	517
	518	atime[1][j] = (second() - t1)/ntimes;
	519	total = atime[0][j] + atime[1][j];
	520	atime[2][j] = total;
	521	atime[3][j] = ops/(1.0e6*total);
	522	atime[4][j] = 2.0/atime[3][j];
	523	atime[5][j] = total/cray;
	524	atime[3][12] = atime[3][12] + atime[3][j];
	525
	526	print_time(j);
	527	}
	528	atime[3][12] = atime[3][12] / 5.0;
[0a16d5f]	529	fprintf (stdout,"Average %11.2f\n",
[8783660]	530	(double)atime[3][12]);
	531
	532	/************************************************************************
	533	* Use minimum average as overall Mflops rating *
	534	************************************************************************/
	535
	536	mflops = atime[3][6];
	537	if (atime[3][12] < mflops) mflops = atime[3][12];
	538
[0a16d5f]	539	fprintf(stdout,"\n");
	540	fprintf(stdout,ROLLING);fprintf(stdout,PREC);
	541	fprintf(stdout," Precision %11.2f Mflops \n\n",mflops);
[e4633241]	542	#ifdef __rtems__
	543	return 0;
	544	#endif
[8783660]	545	}
	546
	547	/----------------------/
	548	void print_time (int row)
	549
	550	{
[0a16d5f]	551	fprintf(stdout,"%11.5f%11.5f%11.5f%11.2f%11.4f%11.4f\n", (double)atime[0][row],
[8783660]	552	(double)atime[1][row], (double)atime[2][row], (double)atime[3][row],
	553	(double)atime[4][row], (double)atime[5][row]);
	554	return;
	555	}
	556
	557	/----------------------/
	558
	559	void matgen (REAL a[], int lda, int n, REAL b[], REAL *norma)
	560
	561
	562	/* We would like to declare a[][lda], but c does not allow it. In this
	563	function, references to a[i][j] are written a[ldai+j]. /
	564
	565	{
	566	int init, i, j;
	567
	568	init = 1325;
	569	*norma = 0.0;
	570	for (j = 0; j < n; j++) {
	571	for (i = 0; i < n; i++) {
	572	init = 3125*init % 65536;
	573	a[lda*j+i] = (init - 32768.0)/16384.0;
	574	norma = (a[ldaj+i] > norma) ? a[ldaj+i] : *norma;
	575
	576	/* alternative for some compilers
	577	if (fabs(a[ldaj+i]) > norma) norma = fabs(a[ldaj+i]);
	578	*/
	579	}
	580	}
	581	for (i = 0; i < n; i++) {
	582	b[i] = 0.0;
	583	}
	584	for (j = 0; j < n; j++) {
	585	for (i = 0; i < n; i++) {
	586	b[i] = b[i] + a[lda*j+i];
	587	}
	588	}
	589	return;
	590	}
	591
	592	/----------------------/
	593	void dgefa(REAL a[], int lda, int n, int ipvt[], int *info)
	594
	595
	596	/* We would like to declare a[][lda], but c does not allow it. In this
	597	function, references to a[i][j] are written a[ldai+j]. /
	598	/*
	599	dgefa factors a double precision matrix by gaussian elimination.
	600
	601	dgefa is usually called by dgeco, but it can be called
	602	directly with a saving in time if rcond is not needed.
	603	(time for dgeco) = (1 + 9/n)*(time for dgefa) .
	604
	605	on entry
	606
	607	a REAL precision[n][lda]
	608	the matrix to be factored.
	609
	610	lda integer
	611	the leading dimension of the array a .
	612
	613	n integer
	614	the order of the matrix a .
	615
	616	on return
	617
	618	a an upper triangular matrix and the multipliers
	619	which were used to obtain it.
	620	the factorization can be written a = l*u where
	621	l is a product of permutation and unit lower
	622	triangular matrices and u is upper triangular.
	623
	624	ipvt integer[n]
	625	an integer vector of pivot indices.
	626
	627	info integer
	628	= 0 normal value.
	629	= k if u[k][k] .eq. 0.0 . this is not an error
	630	condition for this subroutine, but it does
	631	indicate that dgesl or dgedi will divide by zero
	632	if called. use rcond in dgeco for a reliable
	633	indication of singularity.
	634
	635	linpack. this version dated 08/14/78 .
	636	cleve moler, university of new mexico, argonne national lab.
	637
	638	functions
	639
	640	blas daxpy,dscal,idamax
	641	*/
	642
	643	{
	644	/* internal variables */
	645
	646	REAL t;
	647	int j,k,kp1,l,nm1;
	648
	649
	650	/* gaussian elimination with partial pivoting */
	651
	652	*info = 0;
	653	nm1 = n - 1;
	654	if (nm1 >= 0) {
	655	for (k = 0; k < nm1; k++) {
	656	kp1 = k + 1;
	657
	658	/* find l = pivot index */
	659
	660	l = idamax(n-k,&a[lda*k+k],1) + k;
	661	ipvt[k] = l;
	662
	663	/* zero pivot implies this column already
	664	triangularized */
	665
	666	if (a[lda*k+l] != ZERO) {
	667
	668	/* interchange if necessary */
	669
	670	if (l != k) {
	671	t = a[lda*k+l];
	672	a[ldak+l] = a[ldak+k];
	673	a[lda*k+k] = t;
	674	}
	675
	676	/* compute multipliers */
	677
	678	t = -ONE/a[lda*k+k];
	679	dscal(n-(k+1),t,&a[lda*k+k+1],1);
	680
	681	/* row elimination with column indexing */
	682
	683	for (j = kp1; j < n; j++) {
	684	t = a[lda*j+l];
	685	if (l != k) {
	686	a[ldaj+l] = a[ldaj+k];
	687	a[lda*j+k] = t;
	688	}
	689	daxpy(n-(k+1),t,&a[lda*k+k+1],1,
	690	&a[lda*j+k+1],1);
	691	}
	692	}
	693	else {
	694	*info = k;
	695	}
	696	}
	697	}
	698	ipvt[n-1] = n-1;
	699	if (a[lda(n-1)+(n-1)] == ZERO) info = n-1;
	700	return;
	701	}
	702
	703	/----------------------/
	704
	705	void dgesl(REAL a[],int lda,int n,int ipvt[],REAL b[],int job )
	706
	707
	708	/* We would like to declare a[][lda], but c does not allow it. In this
	709	function, references to a[i][j] are written a[ldai+j]. /
	710
	711	/*
	712	dgesl solves the double precision system
	713	a * x = b or trans(a) * x = b
	714	using the factors computed by dgeco or dgefa.
	715
	716	on entry
	717
	718	a double precision[n][lda]
	719	the output from dgeco or dgefa.
	720
	721	lda integer
	722	the leading dimension of the array a .
	723
	724	n integer
	725	the order of the matrix a .
	726
	727	ipvt integer[n]
	728	the pivot vector from dgeco or dgefa.
	729
	730	b double precision[n]
	731	the right hand side vector.
	732
	733	job integer
	734	= 0 to solve a*x = b ,
	735	= nonzero to solve trans(a)*x = b where
	736	trans(a) is the transpose.
	737
	738	on return
	739
	740	b the solution vector x .
	741
	742	error condition
	743
	744	a division by zero will occur if the input factor contains a
	745	zero on the diagonal. technically this indicates singularity
	746	but it is often caused by improper arguments or improper
	747	setting of lda . it will not occur if the subroutines are
	748	called correctly and if dgeco has set rcond .gt. 0.0
	749	or dgefa has set info .eq. 0 .
	750
	751	to compute inverse(a) * c where c is a matrix
	752	with p columns
	753	dgeco(a,lda,n,ipvt,rcond,z)
	754	if (!rcond is too small){
	755	for (j=0,j<p,j++)
	756	dgesl(a,lda,n,ipvt,c[j][0],0);
	757	}
	758
	759	linpack. this version dated 08/14/78 .
	760	cleve moler, university of new mexico, argonne national lab.
	761
	762	functions
	763
	764	blas daxpy,ddot
	765	*/
	766	{
	767	/* internal variables */
	768
	769	REAL t;
	770	int k,kb,l,nm1;
	771
	772	nm1 = n - 1;
	773	if (job == 0) {
	774
	775	/* job = 0 , solve a * x = b
	776	first solve ly = b /
	777
	778	if (nm1 >= 1) {
	779	for (k = 0; k < nm1; k++) {
	780	l = ipvt[k];
	781	t = b[l];
	782	if (l != k){
	783	b[l] = b[k];
	784	b[k] = t;
	785	}
	786	daxpy(n-(k+1),t,&a[lda*k+k+1],1,&b[k+1],1 );
	787	}
	788	}
	789
	790	/* now solve ux = y /
	791
	792	for (kb = 0; kb < n; kb++) {
	793	k = n - (kb + 1);
	794	b[k] = b[k]/a[lda*k+k];
	795	t = -b[k];
	796	daxpy(k,t,&a[lda*k+0],1,&b[0],1 );
	797	}
	798	}
	799	else {
	800
	801	/* job = nonzero, solve trans(a) * x = b
	802	first solve trans(u)y = b /
	803
	804	for (k = 0; k < n; k++) {
	805	t = ddot(k,&a[lda*k+0],1,&b[0],1);
	806	b[k] = (b[k] - t)/a[lda*k+k];
	807	}
	808
	809	/* now solve trans(l)x = y /
	810
	811	if (nm1 >= 1) {
	812	for (kb = 1; kb < nm1; kb++) {
	813	k = n - (kb+1);
	814	b[k] = b[k] + ddot(n-(k+1),&a[lda*k+k+1],1,&b[k+1],1);
	815	l = ipvt[k];
	816	if (l != k) {
	817	t = b[l];
	818	b[l] = b[k];
	819	b[k] = t;
	820	}
	821	}
	822	}
	823	}
	824	return;
	825	}
	826
	827	/----------------------/
	828
	829	void daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy)
	830	/*
	831	constant times a vector plus a vector.
	832	jack dongarra, linpack, 3/11/78.
	833	*/
	834
	835	{
[0a16d5f]	836	int i,ix,iy;
[8783660]	837
	838	if(n <= 0) return;
	839	if (da == ZERO) return;
	840
	841	if(incx != 1 \|\| incy != 1) {
	842
	843	/* code for unequal increments or equal increments
	844	not equal to 1 */
	845
	846	ix = 0;
	847	iy = 0;
	848	if(incx < 0) ix = (-n+1)*incx;
	849	if(incy < 0)iy = (-n+1)*incy;
	850	for (i = 0;i < n; i++) {
	851	dy[iy] = dy[iy] + da*dx[ix];
	852	ix = ix + incx;
	853	iy = iy + incy;
	854
	855	}
	856	return;
	857	}
	858
	859	/* code for both increments equal to 1 */
	860
	861
	862	#ifdef ROLL
	863
	864	for (i = 0;i < n; i++) {
	865	dy[i] = dy[i] + da*dx[i];
	866	}
	867
	868
	869	#endif
	870
	871	#ifdef UNROLL
	872
	873	m = n % 4;
	874	if ( m != 0) {
	875	for (i = 0; i < m; i++)
	876	dy[i] = dy[i] + da*dx[i];
	877
	878	if (n < 4) return;
	879	}
	880	for (i = m; i < n; i = i + 4) {
	881	dy[i] = dy[i] + da*dx[i];
	882	dy[i+1] = dy[i+1] + da*dx[i+1];
	883	dy[i+2] = dy[i+2] + da*dx[i+2];
	884	dy[i+3] = dy[i+3] + da*dx[i+3];
	885
	886	}
	887
	888	#endif
	889	return;
	890	}
	891
	892	/----------------------/
	893
	894	REAL ddot(int n, REAL dx[], int incx, REAL dy[], int incy)
	895	/*
	896	forms the dot product of two vectors.
	897	jack dongarra, linpack, 3/11/78.
	898	*/
	899
	900	{
	901	REAL dtemp;
[0a16d5f]	902	int i,ix,iy;
[8783660]	903
	904	dtemp = ZERO;
	905
	906	if(n <= 0) return(ZERO);
	907
	908	if(incx != 1 \|\| incy != 1) {
	909
	910	/* code for unequal increments or equal increments
	911	not equal to 1 */
	912
	913	ix = 0;
	914	iy = 0;
	915	if (incx < 0) ix = (-n+1)*incx;
	916	if (incy < 0) iy = (-n+1)*incy;
	917	for (i = 0;i < n; i++) {
	918	dtemp = dtemp + dx[ix]*dy[iy];
	919	ix = ix + incx;
	920	iy = iy + incy;
	921
	922	}
	923	return(dtemp);
	924	}
	925
	926	/* code for both increments equal to 1 */
	927
	928
	929	#ifdef ROLL
	930
	931	for (i=0;i < n; i++)
	932	dtemp = dtemp + dx[i]*dy[i];
	933
	934	return(dtemp);
	935
	936	#endif
	937
	938	#ifdef UNROLL
	939
	940
	941	m = n % 5;
	942	if (m != 0) {
	943	for (i = 0; i < m; i++)
	944	dtemp = dtemp + dx[i]*dy[i];
	945	if (n < 5) return(dtemp);
	946	}
	947	for (i = m; i < n; i = i + 5) {
	948	dtemp = dtemp + dx[i]*dy[i] +
	949	dx[i+1]dy[i+1] + dx[i+2]dy[i+2] +
	950	dx[i+3]dy[i+3] + dx[i+4]dy[i+4];
	951	}
	952	return(dtemp);
	953
	954	#endif
	955
	956	}
	957
	958	/----------------------/
	959	void dscal(int n, REAL da, REAL dx[], int incx)
	960
	961	/* scales a vector by a constant.
	962	jack dongarra, linpack, 3/11/78.
	963	*/
	964
	965	{
[0a16d5f]	966	int i,nincx;
[8783660]	967
	968	if(n <= 0)return;
	969	if(incx != 1) {
	970
	971	/* code for increment not equal to 1 */
	972
	973	nincx = n*incx;
	974	for (i = 0; i < nincx; i = i + incx)
	975	dx[i] = da*dx[i];
	976
	977	return;
	978	}
	979
	980	/* code for increment equal to 1 */
	981
	982
	983	#ifdef ROLL
	984
	985	for (i = 0; i < n; i++)
	986	dx[i] = da*dx[i];
	987
	988
	989	#endif
	990
	991	#ifdef UNROLL
	992
	993
	994	m = n % 5;
	995	if (m != 0) {
	996	for (i = 0; i < m; i++)
	997	dx[i] = da*dx[i];
	998	if (n < 5) return;
	999	}
	1000	for (i = m; i < n; i = i + 5){
	1001	dx[i] = da*dx[i];
	1002	dx[i+1] = da*dx[i+1];
	1003	dx[i+2] = da*dx[i+2];
	1004	dx[i+3] = da*dx[i+3];
	1005	dx[i+4] = da*dx[i+4];
	1006	}
	1007
	1008	#endif
	1009
	1010	}
	1011
	1012	/----------------------/
	1013	int idamax(int n, REAL dx[], int incx)
	1014
	1015	/*
	1016	finds the index of element having max. absolute value.
	1017	jack dongarra, linpack, 3/11/78.
	1018	*/
	1019
	1020
	1021	{
	1022	REAL dmax;
	1023	int i, ix, itemp;
	1024
	1025	if( n < 1 ) return(-1);
	1026	if(n ==1 ) return(0);
[0a16d5f]	1027	itemp = -1;
[8783660]	1028	if(incx != 1) {
	1029
	1030	/* code for increment not equal to 1 */
	1031
	1032	ix = 1;
	1033	dmax = fabs((double)dx[0]);
	1034	ix = ix + incx;
	1035	for (i = 1; i < n; i++) {
	1036	if(fabs((double)dx[ix]) > dmax) {
	1037	itemp = i;
	1038	dmax = fabs((double)dx[ix]);
	1039	}
	1040	ix = ix + incx;
	1041	}
	1042	}
	1043	else {
	1044
	1045	/* code for increment equal to 1 */
	1046
	1047	itemp = 0;
	1048	dmax = fabs((double)dx[0]);
	1049	for (i = 1; i < n; i++) {
	1050	if(fabs((double)dx[i]) > dmax) {
	1051	itemp = i;
	1052	dmax = fabs((double)dx[i]);
	1053	}
	1054	}
	1055	}
	1056	return (itemp);
	1057	}
	1058
	1059	/----------------------/
	1060	REAL epslon (REAL x)
	1061
	1062	/*
	1063	estimate unit roundoff in quantities of size x.
	1064	*/
	1065
	1066	{
	1067	REAL a,b,c,eps;
	1068	/*
	1069	this program should function properly on all systems
	1070	satisfying the following two assumptions,
	1071	1. the base used in representing dfloating point
	1072	numbers is not a power of three.
	1073	2. the quantity a in statement 10 is represented to
	1074	the accuracy used in dfloating point variables
	1075	that are stored in memory.
	1076	the statement number 10 and the go to 10 are intended to
	1077	force optimizing compilers to generate code satisfying
	1078	assumption 2.
	1079	under these assumptions, it should be true that,
	1080	a is not exactly equal to four-thirds,
	1081	b has a zero for its last bit or digit,
	1082	c is not exactly equal to one,
	1083	eps measures the separation of 1.0 from
	1084	the next larger dfloating point number.
	1085	the developers of eispack would appreciate being informed
	1086	about any systems where these assumptions do not hold.
	1087
	1088	*****************************************************************
	1089	this routine is one of the auxiliary routines used by eispack iii
	1090	to avoid machine dependencies.
	1091	*****************************************************************
	1092
	1093	this version dated 4/6/83.
	1094	*/
	1095
	1096	a = 4.0e0/3.0e0;
	1097	eps = ZERO;
	1098	while (eps == ZERO) {
	1099	b = a - ONE;
	1100	c = b + b + b;
	1101	eps = fabs((double)(c-ONE));
	1102	}
	1103	return(eps*fabs((double)x));
	1104	}
	1105
	1106	/----------------------/
	1107	void dmxpy (int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
	1108
	1109
	1110	/* We would like to declare m[][ldm], but c does not allow it. In this
	1111	function, references to m[i][j] are written m[ldmi+j]. /
	1112
	1113	/*
	1114	purpose:
	1115	multiply matrix m times vector x and add the result to vector y.
	1116
	1117	parameters:
	1118
	1119	n1 integer, number of elements in vector y, and number of rows in
	1120	matrix m
	1121
	1122	y double [n1], vector of length n1 to which is added
	1123	the product m*x
	1124
	1125	n2 integer, number of elements in vector x, and number of columns
	1126	in matrix m
	1127
	1128	ldm integer, leading dimension of array m
	1129
	1130	x double [n2], vector of length n2
	1131
	1132	m double [ldm][n2], matrix of n1 rows and n2 columns
	1133
	1134	----------------------------------------------------------------------
	1135	*/
	1136	{
	1137	int j,i,jmin;
	1138	/* cleanup odd vector */
	1139
	1140	j = n2 % 2;
	1141	if (j >= 1) {
	1142	j = j - 1;
	1143	for (i = 0; i < n1; i++)
	1144	y[i] = (y[i]) + x[j]m[ldmj+i];
	1145	}
	1146
	1147	/* cleanup odd group of two vectors */
	1148
	1149	j = n2 % 4;
	1150	if (j >= 2) {
	1151	j = j - 1;
	1152	for (i = 0; i < n1; i++)
	1153	y[i] = ( (y[i])
	1154	+ x[j-1]m[ldm(j-1)+i]) + x[j]m[ldmj+i];
	1155	}
	1156
	1157	/* cleanup odd group of four vectors */
	1158
	1159	j = n2 % 8;
	1160	if (j >= 4) {
	1161	j = j - 1;
	1162	for (i = 0; i < n1; i++)
	1163	y[i] = ((( (y[i])
	1164	+ x[j-3]m[ldm(j-3)+i])
	1165	+ x[j-2]m[ldm(j-2)+i])
	1166	+ x[j-1]m[ldm(j-1)+i]) + x[j]m[ldmj+i];
	1167	}
	1168
	1169	/* cleanup odd group of eight vectors */
	1170
	1171	j = n2 % 16;
	1172	if (j >= 8) {
	1173	j = j - 1;
	1174	for (i = 0; i < n1; i++)
	1175	y[i] = ((((((( (y[i])
	1176	+ x[j-7]m[ldm(j-7)+i]) + x[j-6]m[ldm(j-6)+i])
	1177	+ x[j-5]m[ldm(j-5)+i]) + x[j-4]m[ldm(j-4)+i])
	1178	+ x[j-3]m[ldm(j-3)+i]) + x[j-2]m[ldm(j-2)+i])
	1179	+ x[j-1]m[ldm(j-1)+i]) + x[j] m[ldmj+i];
	1180	}
	1181
	1182	/* main loop - groups of sixteen vectors */
	1183
	1184	jmin = (n2%16)+16;
	1185	for (j = jmin-1; j < n2; j = j + 16) {
	1186	for (i = 0; i < n1; i++)
	1187	y[i] = ((((((((((((((( (y[i])
	1188	+ x[j-15]m[ldm(j-15)+i])
	1189	+ x[j-14]m[ldm(j-14)+i])
	1190	+ x[j-13]m[ldm(j-13)+i])
	1191	+ x[j-12]m[ldm(j-12)+i])
	1192	+ x[j-11]m[ldm(j-11)+i])
	1193	+ x[j-10]m[ldm(j-10)+i])
	1194	+ x[j- 9]m[ldm(j- 9)+i])
	1195	+ x[j- 8]m[ldm(j- 8)+i])
	1196	+ x[j- 7]m[ldm(j- 7)+i])
	1197	+ x[j- 6]m[ldm(j- 6)+i])
	1198	+ x[j- 5]m[ldm(j- 5)+i])
	1199	+ x[j- 4]m[ldm(j- 4)+i])
	1200	+ x[j- 3]m[ldm(j- 3)+i])
	1201	+ x[j- 2]m[ldm(j- 2)+i])
	1202	+ x[j- 1]m[ldm(j- 1)+i])
	1203	+ x[j] m[ldmj+i];
	1204	}
	1205	return;
	1206	}
	1207
	1208	/----------------------/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: