| [5f600f6] | 1 | # include <stdlib.h>
|
|---|
| 2 | # include <stdio.h>
|
|---|
| 3 | # include <math.h>
|
|---|
| 4 | # include <omp.h>
|
|---|
| 5 | # include <time.h>
|
|---|
| 6 |
|
|---|
| 7 | int main ( void );
|
|---|
| 8 | void test01 ( int n );
|
|---|
| 9 | void test02 ( int n );
|
|---|
| 10 | void test03 ( int n );
|
|---|
| 11 |
|
|---|
| 12 | int isamax ( int n, float x[], int incx );
|
|---|
| 13 | void matgen ( int lda, int n, float a[], float x[], float b[] );
|
|---|
| 14 | void msaxpy ( int nr, int nc, float a[], int n, float x[], float y[] );
|
|---|
| 15 | void msaxpy2 ( int nr, int nc, float a[], int n, float x[], float y[] );
|
|---|
| 16 | int msgefa ( float a[], int lda, int n, int ipvt[] );
|
|---|
| 17 | int msgefa2 ( float a[], int lda, int n, int ipvt[] );
|
|---|
| 18 | void saxpy ( int n, float a, float x[], int incx, float y[], int incy );
|
|---|
| 19 | float sdot ( int n, float x[], int incx, float y[], int incy );
|
|---|
| 20 | int sgefa ( float a[], int lda, int n, int ipvt[] );
|
|---|
| 21 | void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job );
|
|---|
| 22 | void sscal ( int n, float a, float x[], int incx );
|
|---|
| 23 | void sswap ( int n, float x[], int incx, float y[], int incy );
|
|---|
| 24 | void timestamp ( void );
|
|---|
| 25 |
|
|---|
| 26 | /******************************************************************************/
|
|---|
| 27 |
|
|---|
| 28 | int main ( void )
|
|---|
| 29 |
|
|---|
| 30 | /******************************************************************************/
|
|---|
| 31 | /*
|
|---|
| 32 | Purpose:
|
|---|
| 33 |
|
|---|
| 34 | MAIN is the main program for the SGEFA_OPENMP test program.
|
|---|
| 35 |
|
|---|
| 36 | Discussion:
|
|---|
| 37 |
|
|---|
| 38 | We want to compare methods of solving the linear system A*x=b.
|
|---|
| 39 |
|
|---|
| 40 | The first way uses the standard sequential algorithm "SGEFA".
|
|---|
| 41 |
|
|---|
| 42 | The second way uses a variant of SGEFA that has been modified to
|
|---|
| 43 | take advantage of OpenMP.
|
|---|
| 44 |
|
|---|
| 45 | The third way reruns the variant code, but with OpenMP turned off.
|
|---|
| 46 |
|
|---|
| 47 | Modified:
|
|---|
| 48 |
|
|---|
| 49 | 17 April 2009
|
|---|
| 50 |
|
|---|
| 51 | Author:
|
|---|
| 52 |
|
|---|
| 53 | John Burkardt
|
|---|
| 54 | */
|
|---|
| 55 | {
|
|---|
| 56 | int n;
|
|---|
| 57 |
|
|---|
| 58 | timestamp ( );
|
|---|
| 59 |
|
|---|
| 60 | printf ( "\n" );
|
|---|
| 61 | printf ( "SGEFA_OPENMP\n" );
|
|---|
| 62 | printf ( " C + OpenMP version\n" );
|
|---|
| 63 |
|
|---|
| 64 | printf ( "\n" );
|
|---|
| 65 | printf ( " Number of processors available = %d\n", omp_get_num_procs ( ) );
|
|---|
| 66 | printf ( " Number of threads = %d\n", omp_get_max_threads ( ) );
|
|---|
| 67 |
|
|---|
| 68 | printf ( "\n" );
|
|---|
| 69 | printf ( " Algorithm Mode N Error Time\n" );
|
|---|
| 70 |
|
|---|
| 71 | printf ( "\n" );
|
|---|
| 72 | n = 10;
|
|---|
| 73 | test01 ( n );
|
|---|
| 74 | test02 ( n );
|
|---|
| 75 | test03 ( n );
|
|---|
| [e479e1e] | 76 | /*
|
|---|
| [5f600f6] | 77 | printf ( "\n" );
|
|---|
| 78 | n = 100;
|
|---|
| 79 | test01 ( n );
|
|---|
| 80 | test02 ( n );
|
|---|
| 81 | test03 ( n );
|
|---|
| 82 |
|
|---|
| 83 | printf ( "\n" );
|
|---|
| 84 | n = 1000;
|
|---|
| 85 | test01 ( n );
|
|---|
| 86 | test02 ( n );
|
|---|
| 87 | test03 ( n );
|
|---|
| [e479e1e] | 88 | */
|
|---|
| [5f600f6] | 89 | printf ( "\n" );
|
|---|
| 90 | printf ( "SGEFA_OPENMP\n" );
|
|---|
| 91 | printf ( " Normal end of execution.\n" );
|
|---|
| 92 |
|
|---|
| 93 | printf ( "\n" );
|
|---|
| 94 | timestamp ( );
|
|---|
| 95 |
|
|---|
| 96 | return 0;
|
|---|
| 97 | }
|
|---|
| 98 | /******************************************************************************/
|
|---|
| 99 |
|
|---|
| 100 | void test01 ( int n )
|
|---|
| 101 |
|
|---|
| 102 | /******************************************************************************/
|
|---|
| 103 | /*
|
|---|
| 104 | Purpose:
|
|---|
| 105 |
|
|---|
| 106 | TEST01 runs the sequential version of SGEFA.
|
|---|
| 107 |
|
|---|
| 108 | Modified:
|
|---|
| 109 |
|
|---|
| 110 | 07 April 2008
|
|---|
| 111 |
|
|---|
| 112 | Author:
|
|---|
| 113 |
|
|---|
| 114 | John Burkardt
|
|---|
| 115 | */
|
|---|
| 116 | {
|
|---|
| 117 | float *a;
|
|---|
| 118 | float *b;
|
|---|
| 119 | float err;
|
|---|
| 120 | int i;
|
|---|
| 121 | int info;
|
|---|
| 122 | int *ipvt;
|
|---|
| 123 | int job;
|
|---|
| 124 | int lda;
|
|---|
| 125 | double wtime;
|
|---|
| 126 | float *x;
|
|---|
| 127 | /*
|
|---|
| 128 | Generate the linear system A * x = b.
|
|---|
| 129 | */
|
|---|
| 130 | lda = n;
|
|---|
| 131 | a = ( float * ) malloc ( lda * n * sizeof ( float ) );
|
|---|
| 132 | b = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 133 | x = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 134 |
|
|---|
| 135 | matgen ( lda, n, a, x, b );
|
|---|
| 136 | /*
|
|---|
| 137 | Factor the linear system.
|
|---|
| 138 | */
|
|---|
| 139 | ipvt = ( int * ) malloc ( n * sizeof ( int ) );
|
|---|
| 140 |
|
|---|
| 141 | wtime = omp_get_wtime ( );
|
|---|
| 142 | info = sgefa ( a, lda, n, ipvt );
|
|---|
| 143 | wtime = omp_get_wtime ( ) - wtime;
|
|---|
| 144 |
|
|---|
| 145 | if ( info != 0 )
|
|---|
| 146 | {
|
|---|
| 147 | printf ( "\n" );
|
|---|
| 148 | printf ( "TEST01 - Fatal error!\n" );
|
|---|
| 149 | printf ( " SGEFA reports the matrix is singular.\n" );
|
|---|
| 150 | exit ( 1 );
|
|---|
| 151 | }
|
|---|
| 152 | /*
|
|---|
| 153 | Solve the linear system.
|
|---|
| 154 | */
|
|---|
| 155 | job = 0;
|
|---|
| 156 | sgesl ( a, lda, n, ipvt, b, job );
|
|---|
| 157 |
|
|---|
| 158 | err = 0.0;
|
|---|
| 159 | for ( i = 0; i < n; i++ )
|
|---|
| 160 | {
|
|---|
| 161 | err = err + fabs ( x[i] - b[i] );
|
|---|
| 162 | }
|
|---|
| 163 | printf ( " Original Sequential %8d %10.4e %10.4e\n", n, err, wtime );
|
|---|
| 164 |
|
|---|
| 165 | free ( a );
|
|---|
| 166 | free ( b );
|
|---|
| 167 | free ( ipvt );
|
|---|
| 168 | free ( x );
|
|---|
| 169 |
|
|---|
| 170 | return;
|
|---|
| 171 | }
|
|---|
| 172 | /******************************************************************************/
|
|---|
| 173 |
|
|---|
| 174 | void test02 ( int n )
|
|---|
| 175 |
|
|---|
| 176 | /******************************************************************************/
|
|---|
| 177 | /*
|
|---|
| 178 | Purpose:
|
|---|
| 179 |
|
|---|
| 180 | TEST02 runs the revised version of SGEFA in parallel.
|
|---|
| 181 |
|
|---|
| 182 | Modified:
|
|---|
| 183 |
|
|---|
| 184 | 07 April 2008
|
|---|
| 185 |
|
|---|
| 186 | Author:
|
|---|
| 187 |
|
|---|
| 188 | John Burkardt
|
|---|
| 189 | */
|
|---|
| 190 | {
|
|---|
| 191 | float *a;
|
|---|
| 192 | float *b;
|
|---|
| 193 | float err;
|
|---|
| 194 | int i;
|
|---|
| 195 | int info;
|
|---|
| 196 | int *ipvt;
|
|---|
| 197 | int job;
|
|---|
| 198 | int lda;
|
|---|
| 199 | double wtime;
|
|---|
| 200 | float *x;
|
|---|
| 201 | /*
|
|---|
| 202 | Generate the linear system A * x = b.
|
|---|
| 203 | */
|
|---|
| 204 | lda = n;
|
|---|
| 205 | a = ( float * ) malloc ( lda * n * sizeof ( float ) );
|
|---|
| 206 | b = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 207 | x = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 208 |
|
|---|
| 209 | matgen ( lda, n, a, x, b );
|
|---|
| 210 | /*
|
|---|
| 211 | Factor the linear system.
|
|---|
| 212 | */
|
|---|
| 213 | ipvt = ( int * ) malloc ( n * sizeof ( int ) );
|
|---|
| 214 |
|
|---|
| 215 | wtime = omp_get_wtime ( );
|
|---|
| 216 | info = msgefa ( a, lda, n, ipvt );
|
|---|
| 217 | wtime = omp_get_wtime ( ) - wtime;
|
|---|
| 218 |
|
|---|
| 219 | if ( info != 0 )
|
|---|
| 220 | {
|
|---|
| 221 | printf ( "\n" );
|
|---|
| 222 | printf ( "TEST02 - Fatal error!\n" );
|
|---|
| 223 | printf ( " MSGEFA reports the matrix is singular.\n" );
|
|---|
| 224 | exit ( 1 );
|
|---|
| 225 | }
|
|---|
| 226 | /*
|
|---|
| 227 | Solve the linear system.
|
|---|
| 228 | */
|
|---|
| 229 | job = 0;
|
|---|
| 230 | sgesl ( a, lda, n, ipvt, b, job );
|
|---|
| 231 |
|
|---|
| 232 | err = 0.0;
|
|---|
| 233 | for ( i = 0; i < n; i++ )
|
|---|
| 234 | {
|
|---|
| 235 | err = err + fabs ( x[i] - b[i] );
|
|---|
| 236 | }
|
|---|
| 237 |
|
|---|
| 238 | printf ( " Revised Parallel %8d %10.4e %10.4e\n", n, err, wtime );
|
|---|
| 239 |
|
|---|
| 240 | free ( a );
|
|---|
| 241 | free ( b );
|
|---|
| 242 | free ( ipvt );
|
|---|
| 243 | free ( x );
|
|---|
| 244 |
|
|---|
| 245 | return;
|
|---|
| 246 | }
|
|---|
| 247 | /******************************************************************************/
|
|---|
| 248 |
|
|---|
| 249 | void test03 ( int n )
|
|---|
| 250 |
|
|---|
| 251 | /******************************************************************************/
|
|---|
| 252 | /*
|
|---|
| 253 | Purpose:
|
|---|
| 254 |
|
|---|
| 255 | TEST03 runs the revised version of SGEFA in sequential mode.
|
|---|
| 256 |
|
|---|
| 257 | Modified:
|
|---|
| 258 |
|
|---|
| 259 | 07 April 2008
|
|---|
| 260 |
|
|---|
| 261 | Author:
|
|---|
| 262 |
|
|---|
| 263 | John Burkardt
|
|---|
| 264 | */
|
|---|
| 265 | {
|
|---|
| 266 | float *a;
|
|---|
| 267 | float *b;
|
|---|
| 268 | float err;
|
|---|
| 269 | int i;
|
|---|
| 270 | int info;
|
|---|
| 271 | int *ipvt;
|
|---|
| 272 | int job;
|
|---|
| 273 | int lda;
|
|---|
| 274 | double wtime;
|
|---|
| 275 | float *x;
|
|---|
| 276 | /*
|
|---|
| 277 | Generate the linear system A * x = b.
|
|---|
| 278 | */
|
|---|
| 279 | lda = n;
|
|---|
| 280 | a = ( float * ) malloc ( lda * n * sizeof ( float ) );
|
|---|
| 281 | b = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 282 | x = ( float * ) malloc ( n * sizeof ( float ) );
|
|---|
| 283 |
|
|---|
| 284 | matgen ( lda, n, a, x, b );
|
|---|
| 285 | /*
|
|---|
| 286 | Factor the linear system.
|
|---|
| 287 | */
|
|---|
| 288 | ipvt = ( int * ) malloc ( n * sizeof ( int ) );
|
|---|
| 289 |
|
|---|
| 290 | wtime = omp_get_wtime ( );
|
|---|
| 291 | info = msgefa2 ( a, lda, n, ipvt );
|
|---|
| 292 | wtime = omp_get_wtime ( ) - wtime;
|
|---|
| 293 |
|
|---|
| 294 | if ( info != 0 )
|
|---|
| 295 | {
|
|---|
| 296 | printf ( "\n" );
|
|---|
| 297 | printf ( "TEST03 - Fatal error!\n" );
|
|---|
| 298 | printf ( " MSGEFA2 reports the matrix is singular.\n" );
|
|---|
| 299 | exit ( 1 );
|
|---|
| 300 | }
|
|---|
| 301 | /*
|
|---|
| 302 | Solve the linear system.
|
|---|
| 303 | */
|
|---|
| 304 | job = 0;
|
|---|
| 305 | sgesl ( a, lda, n, ipvt, b, job );
|
|---|
| 306 |
|
|---|
| 307 | err = 0.0;
|
|---|
| 308 | for ( i = 0; i < n; i++ )
|
|---|
| 309 | {
|
|---|
| 310 | err = err + fabs ( x[i] - b[i] );
|
|---|
| 311 | }
|
|---|
| 312 |
|
|---|
| 313 | printf ( " Revised Sequential %8d %10.4e %10.4e\n", n, err, wtime );
|
|---|
| 314 |
|
|---|
| 315 | free ( a );
|
|---|
| 316 | free ( b );
|
|---|
| 317 | free ( ipvt );
|
|---|
| 318 | free ( x );
|
|---|
| 319 |
|
|---|
| 320 | return;
|
|---|
| 321 | }
|
|---|
| 322 | /******************************************************************************/
|
|---|
| 323 |
|
|---|
| 324 | int isamax ( int n, float x[], int incx )
|
|---|
| 325 |
|
|---|
| 326 | /******************************************************************************/
|
|---|
| 327 | /*
|
|---|
| 328 | Purpose:
|
|---|
| 329 |
|
|---|
| 330 | ISAMAX finds the index of the vector element of maximum absolute value.
|
|---|
| 331 |
|
|---|
| 332 | Discussion:
|
|---|
| 333 |
|
|---|
| 334 | WARNING: This index is a 1-based index, not a 0-based index!
|
|---|
| 335 |
|
|---|
| 336 | Modified:
|
|---|
| 337 |
|
|---|
| 338 | 07 April 2008
|
|---|
| 339 |
|
|---|
| 340 | Author:
|
|---|
| 341 |
|
|---|
| 342 | FORTRAN77 original version by Lawson, Hanson, Kincaid, Krogh.
|
|---|
| 343 | C version by John Burkardt
|
|---|
| 344 |
|
|---|
| 345 | Reference:
|
|---|
| 346 |
|
|---|
| 347 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 348 | LINPACK User's Guide,
|
|---|
| 349 | SIAM, 1979,
|
|---|
| 350 | ISBN13: 978-0-898711-72-1,
|
|---|
| 351 | LC: QA214.L56.
|
|---|
| 352 |
|
|---|
| 353 | Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
|
|---|
| 354 | Algorithm 539:
|
|---|
| 355 | Basic Linear Algebra Subprograms for Fortran Usage,
|
|---|
| 356 | ACM Transactions on Mathematical Software,
|
|---|
| 357 | Volume 5, Number 3, September 1979, pages 308-323.
|
|---|
| 358 |
|
|---|
| 359 | Parameters:
|
|---|
| 360 |
|
|---|
| 361 | Input, int N, the number of entries in the vector.
|
|---|
| 362 |
|
|---|
| 363 | Input, float X[*], the vector to be examined.
|
|---|
| 364 |
|
|---|
| 365 | Input, int INCX, the increment between successive entries of SX.
|
|---|
| 366 |
|
|---|
| 367 | Output, int ISAMAX, the index of the element of maximum
|
|---|
| 368 | absolute value.
|
|---|
| 369 | */
|
|---|
| 370 | {
|
|---|
| 371 | float xmax;
|
|---|
| 372 | int i;
|
|---|
| 373 | int ix;
|
|---|
| 374 | int value;
|
|---|
| 375 |
|
|---|
| 376 | value = 0;
|
|---|
| 377 |
|
|---|
| 378 | if ( n < 1 || incx <= 0 )
|
|---|
| 379 | {
|
|---|
| 380 | return value;
|
|---|
| 381 | }
|
|---|
| 382 |
|
|---|
| 383 | value = 1;
|
|---|
| 384 |
|
|---|
| 385 | if ( n == 1 )
|
|---|
| 386 | {
|
|---|
| 387 | return value;
|
|---|
| 388 | }
|
|---|
| 389 |
|
|---|
| 390 | if ( incx == 1 )
|
|---|
| 391 | {
|
|---|
| 392 | xmax = fabs ( x[0] );
|
|---|
| 393 |
|
|---|
| 394 | for ( i = 1; i < n; i++ )
|
|---|
| 395 | {
|
|---|
| 396 | if ( xmax < fabs ( x[i] ) )
|
|---|
| 397 | {
|
|---|
| 398 | value = i + 1;
|
|---|
| 399 | xmax = fabs ( x[i] );
|
|---|
| 400 | }
|
|---|
| 401 | }
|
|---|
| 402 | }
|
|---|
| 403 | else
|
|---|
| 404 | {
|
|---|
| 405 | ix = 0;
|
|---|
| 406 | xmax = fabs ( x[0] );
|
|---|
| 407 | ix = ix + incx;
|
|---|
| 408 |
|
|---|
| 409 | for ( i = 1; i < n; i++ )
|
|---|
| 410 | {
|
|---|
| 411 | if ( xmax < fabs ( x[ix] ) )
|
|---|
| 412 | {
|
|---|
| 413 | value = i + 1;
|
|---|
| 414 | xmax = fabs ( x[ix] );
|
|---|
| 415 | }
|
|---|
| 416 | ix = ix + incx;
|
|---|
| 417 | }
|
|---|
| 418 | }
|
|---|
| 419 |
|
|---|
| 420 | return value;
|
|---|
| 421 | }
|
|---|
| 422 | /*******************************************************************************/
|
|---|
| 423 |
|
|---|
| 424 | void matgen ( int lda, int n, float a[], float x[], float b[] )
|
|---|
| 425 |
|
|---|
| 426 | /*******************************************************************************/
|
|---|
| 427 | /*
|
|---|
| 428 | Purpose:
|
|---|
| 429 |
|
|---|
| 430 | MATGEN generates a "random" matrix for testing.
|
|---|
| 431 |
|
|---|
| 432 | Modified:
|
|---|
| 433 |
|
|---|
| 434 | 27 April 2008
|
|---|
| 435 |
|
|---|
| 436 | Author:
|
|---|
| 437 |
|
|---|
| 438 | John Burkardt
|
|---|
| 439 |
|
|---|
| 440 | Parameters:
|
|---|
| 441 |
|
|---|
| 442 | Input, int LDA, the leading dimension of the matrix.
|
|---|
| 443 |
|
|---|
| 444 | Input, int N, the order of the matrix, and the length of the vector.
|
|---|
| 445 |
|
|---|
| 446 | Output, float A[LDA*N], the matrix.
|
|---|
| 447 |
|
|---|
| 448 | Output, float X[N], the solution vector.
|
|---|
| 449 |
|
|---|
| 450 | Output, float B[N], the right hand side vector.
|
|---|
| 451 | */
|
|---|
| 452 | {
|
|---|
| 453 | int i;
|
|---|
| 454 | int j;
|
|---|
| 455 | int seed;
|
|---|
| 456 | float value;
|
|---|
| 457 |
|
|---|
| 458 | seed = 1325;
|
|---|
| 459 | /*
|
|---|
| 460 | Set the matrix A.
|
|---|
| 461 | */
|
|---|
| 462 | for ( j = 0; j < n; j++ )
|
|---|
| 463 | {
|
|---|
| 464 | for ( i = 0; i < n; i++ )
|
|---|
| 465 | {
|
|---|
| 466 | seed = ( 3125 * seed ) % 65536;
|
|---|
| 467 | value = ( ( float ) seed - 32768.0 ) / 16384.0;
|
|---|
| 468 | a[i+j*lda] = value;
|
|---|
| 469 | }
|
|---|
| 470 | }
|
|---|
| 471 | /*
|
|---|
| 472 | Set x.
|
|---|
| 473 | */
|
|---|
| 474 | for ( i = 0; i < n; i++ )
|
|---|
| 475 | {
|
|---|
| 476 | x[i] = ( float ) ( i + 1 ) / ( ( float ) n );
|
|---|
| 477 | }
|
|---|
| 478 | /*
|
|---|
| 479 | Set b = A * x.
|
|---|
| 480 | */
|
|---|
| 481 | for ( i = 0; i < n; i++ )
|
|---|
| 482 | {
|
|---|
| 483 | b[i] = 0.0;
|
|---|
| 484 | for ( j = 0; j < n; j++ )
|
|---|
| 485 | {
|
|---|
| 486 | b[i] = b[i] + a[i+j*lda] * x[j];
|
|---|
| 487 | }
|
|---|
| 488 | }
|
|---|
| 489 | return;
|
|---|
| 490 | }
|
|---|
| 491 | /******************************************************************************/
|
|---|
| 492 |
|
|---|
| 493 | void msaxpy ( int nr, int nc, float a[], int n, float x[], float y[] )
|
|---|
| 494 |
|
|---|
| 495 | /******************************************************************************/
|
|---|
| 496 | /*
|
|---|
| 497 | Purpose:
|
|---|
| 498 |
|
|---|
| 499 | MSAXPY carries out multiple "SAXPY" operations.
|
|---|
| 500 |
|
|---|
| 501 | Discussion:
|
|---|
| 502 |
|
|---|
| 503 | This routine carries out the step of Gaussian elimination where multiples
|
|---|
| 504 | of the pivot row are added to the rows below the pivot row.
|
|---|
| 505 |
|
|---|
| 506 | A single call to MSAXPY replaces multiple calls to SAXPY.
|
|---|
| 507 |
|
|---|
| 508 | Modified:
|
|---|
| 509 |
|
|---|
| 510 | 07 April 2008
|
|---|
| 511 |
|
|---|
| 512 | Author:
|
|---|
| 513 |
|
|---|
| 514 | Wesley Petersen
|
|---|
| 515 |
|
|---|
| 516 | Parameters:
|
|---|
| 517 |
|
|---|
| 518 | Input, int NR, NC, the number of rows and columns in the matrix.
|
|---|
| 519 |
|
|---|
| 520 | Input, float A[*], ...
|
|---|
| 521 |
|
|---|
| 522 | Input, int N, ...
|
|---|
| 523 |
|
|---|
| 524 | Input, float X[*], ...
|
|---|
| 525 |
|
|---|
| 526 | Output, float Y[*], ...
|
|---|
| 527 | */
|
|---|
| 528 | {
|
|---|
| 529 | int i,j;
|
|---|
| 530 |
|
|---|
| 531 | # pragma omp parallel \
|
|---|
| 532 | shared ( a, nc, nr, x, y ) \
|
|---|
| 533 | private ( i, j )
|
|---|
| 534 |
|
|---|
| 535 | # pragma omp for
|
|---|
| 536 | for ( j = 0; j < nc; j++)
|
|---|
| 537 | {
|
|---|
| 538 | for ( i = 0; i < nr; i++ )
|
|---|
| 539 | {
|
|---|
| 540 | y[i+j*n] += a[j*n] * x[i];
|
|---|
| 541 | }
|
|---|
| 542 | }
|
|---|
| 543 | return;
|
|---|
| 544 | }
|
|---|
| 545 | /******************************************************************************/
|
|---|
| 546 |
|
|---|
| 547 | void msaxpy2 ( int nr, int nc, float a[], int n, float x[], float y[] )
|
|---|
| 548 |
|
|---|
| 549 | /******************************************************************************/
|
|---|
| 550 | /*
|
|---|
| 551 | Purpose:
|
|---|
| 552 |
|
|---|
| 553 | MSAXPY2 carries out multiple "SAXPY" operations.
|
|---|
| 554 |
|
|---|
| 555 | Discussion:
|
|---|
| 556 |
|
|---|
| 557 | This routine carries out the step of Gaussian elimination where multiples
|
|---|
| 558 | of the pivot row are added to the rows below the pivot row.
|
|---|
| 559 |
|
|---|
| 560 | A single call to MSAXPY replaces multiple calls to SAXPY.
|
|---|
| 561 |
|
|---|
| 562 | Modified:
|
|---|
| 563 |
|
|---|
| 564 | 07 April 2008
|
|---|
| 565 |
|
|---|
| 566 | Author:
|
|---|
| 567 |
|
|---|
| 568 | Wesley Petersen
|
|---|
| 569 |
|
|---|
| 570 | Parameters:
|
|---|
| 571 |
|
|---|
| 572 | Input, int NR, NC, the number of rows and columns in the matrix.
|
|---|
| 573 |
|
|---|
| 574 | Input, float A[*], ...
|
|---|
| 575 |
|
|---|
| 576 | Input, int N, ...
|
|---|
| 577 |
|
|---|
| 578 | Input, float X[*], ...
|
|---|
| 579 |
|
|---|
| 580 | Output, float Y[*], ...
|
|---|
| 581 | */
|
|---|
| 582 | {
|
|---|
| 583 | int i,j;
|
|---|
| 584 |
|
|---|
| 585 | for ( j = 0; j < nc; j++)
|
|---|
| 586 | {
|
|---|
| 587 | for ( i = 0; i < nr; i++ )
|
|---|
| 588 | {
|
|---|
| 589 | y[i+j*n] += a[j*n] * x[i];
|
|---|
| 590 | }
|
|---|
| 591 | }
|
|---|
| 592 | return;
|
|---|
| 593 | }
|
|---|
| 594 | /******************************************************************************/
|
|---|
| 595 |
|
|---|
| 596 | int msgefa ( float a[], int lda, int n, int ipvt[] )
|
|---|
| 597 |
|
|---|
| 598 | /******************************************************************************/
|
|---|
| 599 | /*
|
|---|
| 600 | Purpose:
|
|---|
| 601 |
|
|---|
| 602 | MSGEFA factors a matrix by gaussian elimination.
|
|---|
| 603 |
|
|---|
| 604 | Discussion:
|
|---|
| 605 |
|
|---|
| 606 | Matrix references which would, mathematically, be written A(I,J)
|
|---|
| 607 | must be written here as:
|
|---|
| 608 | * A[I+J*LDA], when the value is needed, or
|
|---|
| 609 | * A+I+J*LDA, when the address is needed.
|
|---|
| 610 |
|
|---|
| 611 | This variant of SGEFA uses OpenMP for improved parallel execution.
|
|---|
| 612 | The step in which multiples of the pivot row are added to individual
|
|---|
| 613 | rows has been replaced by a single call which updates the entire
|
|---|
| 614 | matrix sub-block.
|
|---|
| 615 |
|
|---|
| 616 | Modified:
|
|---|
| 617 |
|
|---|
| 618 | 07 March 2008
|
|---|
| 619 |
|
|---|
| 620 | Author:
|
|---|
| 621 |
|
|---|
| 622 | FORTRAN77 original version by Cleve Moler.
|
|---|
| 623 | C version by Wesley Petersen.
|
|---|
| 624 |
|
|---|
| 625 | Reference:
|
|---|
| 626 |
|
|---|
| 627 | Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
|
|---|
| 628 | LINPACK User's Guide,
|
|---|
| 629 | SIAM, 1979,
|
|---|
| 630 | ISBN13: 978-0-898711-72-1,
|
|---|
| 631 | LC: QA214.L56.
|
|---|
| 632 |
|
|---|
| 633 | Parameters:
|
|---|
| 634 |
|
|---|
| 635 | Input/output, float A[LDA*N]. On input, the matrix to be factored.
|
|---|
| 636 | On output, an upper triangular matrix and the multipliers which were
|
|---|
| 637 | used to obtain it. The factorization can be written A = L * U where
|
|---|
| 638 | L is a product of permutation and unit lower triangular matrices and
|
|---|
| 639 | U is upper triangular.
|
|---|
| 640 |
|
|---|
| 641 | Input, int LDA, the leading dimension of the matrix.
|
|---|
| 642 |
|
|---|
| 643 | Input, int N, the order of the matrix.
|
|---|
| 644 |
|
|---|
| 645 | Output, int IPVT[N], the pivot indices.
|
|---|
| 646 |
|
|---|
| 647 | Output, int MSGEFA, indicates singularity.
|
|---|
| 648 | If 0, this is the normal value, and the algorithm succeeded.
|
|---|
| 649 | If K, then on the K-th elimination step, a zero pivot was encountered.
|
|---|
| 650 | The matrix is numerically not invertible.
|
|---|
| 651 | */
|
|---|
| 652 | {
|
|---|
| 653 | int info;
|
|---|
| 654 | int k,kp1,l,nm1;
|
|---|
| 655 | float t;
|
|---|
| 656 |
|
|---|
| 657 | info = 0;
|
|---|
| 658 | nm1 = n - 1;
|
|---|
| 659 | for ( k = 0; k < nm1; k++ )
|
|---|
| 660 | {
|
|---|
| 661 | kp1 = k + 1;
|
|---|
| 662 | l = isamax ( n-k, a+k+k*lda, 1 ) + k - 1;
|
|---|
| 663 | ipvt[k] = l + 1;
|
|---|
| 664 |
|
|---|
| 665 | if ( a[l+k*lda] == 0.0 )
|
|---|
| 666 | {
|
|---|
| 667 | info = k + 1;
|
|---|
| 668 | return info;
|
|---|
| 669 | }
|
|---|
| 670 |
|
|---|
| 671 | if ( l != k )
|
|---|
| 672 | {
|
|---|
| 673 | t = a[l+k*lda];
|
|---|
| 674 | a[l+k*lda] = a[k+k*lda];
|
|---|
| 675 | a[k+k*lda] = t;
|
|---|
| 676 | }
|
|---|
| 677 | t = -1.0 / a[k+k*lda];
|
|---|
| 678 | sscal ( n-k-1, t, a+kp1+k*lda, 1 );
|
|---|
| 679 | /*
|
|---|
| 680 | Interchange the pivot row and the K-th row.
|
|---|
| 681 | */
|
|---|
| 682 | if ( l != k )
|
|---|
| 683 | {
|
|---|
| 684 | sswap ( n-k-1, a+l+kp1*lda, lda, a+k+kp1*lda, lda );
|
|---|
| 685 | }
|
|---|
| 686 | /*
|
|---|
| 687 | Add multiples of the K-th row to rows K+1 through N.
|
|---|
| 688 | */
|
|---|
| 689 | msaxpy ( n-k-1, n-k-1, a+k+kp1*lda, n, a+kp1+k*lda, a+kp1+kp1*lda );
|
|---|
| 690 | }
|
|---|
| 691 |
|
|---|
| 692 | ipvt[n-1] = n;
|
|---|
| 693 |
|
|---|
| 694 | if ( a[n-1+(n-1)*lda] == 0.0 )
|
|---|
| 695 | {
|
|---|
| 696 | info = n;
|
|---|
| 697 | }
|
|---|
| 698 |
|
|---|
| 699 | return info;
|
|---|
| 700 | }
|
|---|
| 701 | /******************************************************************************/
|
|---|
| 702 |
|
|---|
| 703 | int msgefa2 ( float a[], int lda, int n, int ipvt[] )
|
|---|
| 704 |
|
|---|
| 705 | /******************************************************************************/
|
|---|
| 706 | /*
|
|---|
| 707 | Purpose:
|
|---|
| 708 |
|
|---|
| 709 | MSGEFA2 factors a matrix by gaussian elimination.
|
|---|
| 710 |
|
|---|
| 711 | Discussion:
|
|---|
| 712 |
|
|---|
| 713 | Matrix references which would, mathematically, be written A(I,J)
|
|---|
| 714 | must be written here as:
|
|---|
| 715 | * A[I+J*LDA], when the value is needed, or
|
|---|
| 716 | * A+I+J*LDA, when the address is needed.
|
|---|
| 717 |
|
|---|
| 718 | This variant of SGEFA uses OpenMP for improved parallel execution.
|
|---|
| 719 | The step in which multiples of the pivot row are added to individual
|
|---|
| 720 | rows has been replaced by a single call which updates the entire
|
|---|
| 721 | matrix sub-block.
|
|---|
| 722 |
|
|---|
| 723 | Modified:
|
|---|
| 724 |
|
|---|
| 725 | 07 March 2008
|
|---|
| 726 |
|
|---|
| 727 | Author:
|
|---|
| 728 |
|
|---|
| 729 | FORTRAN77 original version by Cleve Moler.
|
|---|
| 730 | C version by Wesley Petersen.
|
|---|
| 731 |
|
|---|
| 732 | Reference:
|
|---|
| 733 |
|
|---|
| 734 | Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
|
|---|
| 735 | LINPACK User's Guide,
|
|---|
| 736 | SIAM, 1979,
|
|---|
| 737 | ISBN13: 978-0-898711-72-1,
|
|---|
| 738 | LC: QA214.L56.
|
|---|
| 739 |
|
|---|
| 740 | Parameters:
|
|---|
| 741 |
|
|---|
| 742 | Input/output, float A[LDA*N]. On input, the matrix to be factored.
|
|---|
| 743 | On output, an upper triangular matrix and the multipliers which were
|
|---|
| 744 | used to obtain it. The factorization can be written A = L * U where
|
|---|
| 745 | L is a product of permutation and unit lower triangular matrices and
|
|---|
| 746 | U is upper triangular.
|
|---|
| 747 |
|
|---|
| 748 | Input, int LDA, the leading dimension of the matrix.
|
|---|
| 749 |
|
|---|
| 750 | Input, int N, the order of the matrix.
|
|---|
| 751 |
|
|---|
| 752 | Output, int IPVT[N], the pivot indices.
|
|---|
| 753 |
|
|---|
| 754 | Output, int MSGEFA, indicates singularity.
|
|---|
| 755 | If 0, this is the normal value, and the algorithm succeeded.
|
|---|
| 756 | If K, then on the K-th elimination step, a zero pivot was encountered.
|
|---|
| 757 | The matrix is numerically not invertible.
|
|---|
| 758 | */
|
|---|
| 759 | {
|
|---|
| 760 | int info;
|
|---|
| 761 | int k,kp1,l,nm1;
|
|---|
| 762 | float t;
|
|---|
| 763 |
|
|---|
| 764 | info = 0;
|
|---|
| 765 | nm1 = n - 1;
|
|---|
| 766 | for ( k = 0; k < nm1; k++ )
|
|---|
| 767 | {
|
|---|
| 768 | kp1 = k + 1;
|
|---|
| 769 | l = isamax ( n-k, a+k+k*lda, 1 ) + k - 1;
|
|---|
| 770 | ipvt[k] = l + 1;
|
|---|
| 771 |
|
|---|
| 772 | if ( a[l+k*lda] == 0.0 )
|
|---|
| 773 | {
|
|---|
| 774 | info = k + 1;
|
|---|
| 775 | return info;
|
|---|
| 776 | }
|
|---|
| 777 |
|
|---|
| 778 | if ( l != k )
|
|---|
| 779 | {
|
|---|
| 780 | t = a[l+k*lda];
|
|---|
| 781 | a[l+k*lda] = a[k+k*lda];
|
|---|
| 782 | a[k+k*lda] = t;
|
|---|
| 783 | }
|
|---|
| 784 | t = -1.0 / a[k+k*lda];
|
|---|
| 785 | sscal ( n-k-1, t, a+kp1+k*lda, 1 );
|
|---|
| 786 | /*
|
|---|
| 787 | Interchange the pivot row and the K-th row.
|
|---|
| 788 | */
|
|---|
| 789 | if ( l != k )
|
|---|
| 790 | {
|
|---|
| 791 | sswap ( n-k-1, a+l+kp1*lda, lda, a+k+kp1*lda, lda );
|
|---|
| 792 | }
|
|---|
| 793 | /*
|
|---|
| 794 | Add multiples of the K-th row to rows K+1 through N.
|
|---|
| 795 | */
|
|---|
| 796 | msaxpy2 ( n-k-1, n-k-1, a+k+kp1*lda, n, a+kp1+k*lda, a+kp1+kp1*lda );
|
|---|
| 797 | }
|
|---|
| 798 |
|
|---|
| 799 | ipvt[n-1] = n;
|
|---|
| 800 |
|
|---|
| 801 | if ( a[n-1+(n-1)*lda] == 0.0 )
|
|---|
| 802 | {
|
|---|
| 803 | info = n;
|
|---|
| 804 | }
|
|---|
| 805 |
|
|---|
| 806 | return info;
|
|---|
| 807 | }
|
|---|
| 808 | /******************************************************************************/
|
|---|
| 809 |
|
|---|
| 810 | void saxpy ( int n, float a, float x[], int incx, float y[], int incy )
|
|---|
| 811 |
|
|---|
| 812 | /******************************************************************************/
|
|---|
| 813 | /*
|
|---|
| 814 | Purpose:
|
|---|
| 815 |
|
|---|
| 816 | SAXPY computes float constant times a vector plus a vector.
|
|---|
| 817 |
|
|---|
| 818 | Discussion:
|
|---|
| 819 |
|
|---|
| 820 | This routine uses unrolled loops for increments equal to one.
|
|---|
| 821 |
|
|---|
| 822 | Modified:
|
|---|
| 823 |
|
|---|
| 824 | 23 February 2006
|
|---|
| 825 |
|
|---|
| 826 | Author:
|
|---|
| 827 |
|
|---|
| 828 | FORTRAN77 original version by Dongarra, Moler, Bunch, Stewart.
|
|---|
| 829 | C version by John Burkardt
|
|---|
| 830 |
|
|---|
| 831 | Reference:
|
|---|
| 832 |
|
|---|
| 833 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 834 | LINPACK User's Guide,
|
|---|
| 835 | SIAM, 1979,
|
|---|
| 836 | ISBN13: 978-0-898711-72-1,
|
|---|
| 837 | LC: QA214.L56.
|
|---|
| 838 |
|
|---|
| 839 | Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
|
|---|
| 840 | Basic Linear Algebra Subprograms for Fortran Usage,
|
|---|
| 841 | Algorithm 539,
|
|---|
| 842 | ACM Transactions on Mathematical Software,
|
|---|
| 843 | Volume 5, Number 3, September 1979, pages 308-323.
|
|---|
| 844 |
|
|---|
| 845 | Parameters:
|
|---|
| 846 |
|
|---|
| 847 | Input, int N, the number of elements in X and Y.
|
|---|
| 848 |
|
|---|
| 849 | Input, float A, the multiplier of X.
|
|---|
| 850 |
|
|---|
| 851 | Input, float X[*], the first vector.
|
|---|
| 852 |
|
|---|
| 853 | Input, int INCX, the increment between successive entries of X.
|
|---|
| 854 |
|
|---|
| 855 | Input/output, float Y[*], the second vector.
|
|---|
| 856 | On output, Y[*] has been replaced by Y[*] + A * X[*].
|
|---|
| 857 |
|
|---|
| 858 | Input, int INCY, the increment between successive entries of Y.
|
|---|
| 859 | */
|
|---|
| 860 | {
|
|---|
| 861 | int i;
|
|---|
| 862 | int ix;
|
|---|
| 863 | int iy;
|
|---|
| 864 | int m;
|
|---|
| 865 |
|
|---|
| 866 | if ( n <= 0 )
|
|---|
| 867 | {
|
|---|
| 868 | return;
|
|---|
| 869 | }
|
|---|
| 870 |
|
|---|
| 871 | if ( a == 0.0 )
|
|---|
| 872 | {
|
|---|
| 873 | return;
|
|---|
| 874 | }
|
|---|
| 875 | /*
|
|---|
| 876 | Code for unequal increments or equal increments
|
|---|
| 877 | not equal to 1.
|
|---|
| 878 | */
|
|---|
| 879 | if ( incx != 1 || incy != 1 )
|
|---|
| 880 | {
|
|---|
| 881 | if ( 0 <= incx )
|
|---|
| 882 | {
|
|---|
| 883 | ix = 0;
|
|---|
| 884 | }
|
|---|
| 885 | else
|
|---|
| 886 | {
|
|---|
| 887 | ix = ( - n + 1 ) * incx;
|
|---|
| 888 | }
|
|---|
| 889 |
|
|---|
| 890 | if ( 0 <= incy )
|
|---|
| 891 | {
|
|---|
| 892 | iy = 0;
|
|---|
| 893 | }
|
|---|
| 894 | else
|
|---|
| 895 | {
|
|---|
| 896 | iy = ( - n + 1 ) * incy;
|
|---|
| 897 | }
|
|---|
| 898 |
|
|---|
| 899 | for ( i = 0; i < n; i++ )
|
|---|
| 900 | {
|
|---|
| 901 | y[iy] = y[iy] + a * x[ix];
|
|---|
| 902 | ix = ix + incx;
|
|---|
| 903 | iy = iy + incy;
|
|---|
| 904 | }
|
|---|
| 905 | }
|
|---|
| 906 | /*
|
|---|
| 907 | Code for both increments equal to 1.
|
|---|
| 908 | */
|
|---|
| 909 | else
|
|---|
| 910 | {
|
|---|
| 911 | m = n % 4;
|
|---|
| 912 |
|
|---|
| 913 | for ( i = 0; i < m; i++ )
|
|---|
| 914 | {
|
|---|
| 915 | y[i] = y[i] + a * x[i];
|
|---|
| 916 | }
|
|---|
| 917 |
|
|---|
| 918 | for ( i = m; i < n; i = i + 4 )
|
|---|
| 919 | {
|
|---|
| 920 | y[i ] = y[i ] + a * x[i ];
|
|---|
| 921 | y[i+1] = y[i+1] + a * x[i+1];
|
|---|
| 922 | y[i+2] = y[i+2] + a * x[i+2];
|
|---|
| 923 | y[i+3] = y[i+3] + a * x[i+3];
|
|---|
| 924 | }
|
|---|
| 925 | }
|
|---|
| 926 |
|
|---|
| 927 | return;
|
|---|
| 928 | }
|
|---|
| 929 | /******************************************************************************/
|
|---|
| 930 |
|
|---|
| 931 | float sdot ( int n, float x[], int incx, float y[], int incy )
|
|---|
| 932 |
|
|---|
| 933 | /******************************************************************************/
|
|---|
| 934 | /*
|
|---|
| 935 | Purpose:
|
|---|
| 936 |
|
|---|
| 937 | SDOT forms the dot product of two vectors.
|
|---|
| 938 |
|
|---|
| 939 | Discussion:
|
|---|
| 940 |
|
|---|
| 941 | This routine uses unrolled loops for increments equal to one.
|
|---|
| 942 |
|
|---|
| 943 | Modified:
|
|---|
| 944 |
|
|---|
| 945 | 23 February 2006
|
|---|
| 946 |
|
|---|
| 947 | Author:
|
|---|
| 948 |
|
|---|
| 949 | FORTRAN77 original version by Dongarra, Moler, Bunch, Stewart
|
|---|
| 950 | C version by John Burkardt
|
|---|
| 951 |
|
|---|
| 952 | Reference:
|
|---|
| 953 |
|
|---|
| 954 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 955 | LINPACK User's Guide,
|
|---|
| 956 | SIAM, 1979.
|
|---|
| 957 |
|
|---|
| 958 | Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
|
|---|
| 959 | Basic Linear Algebra Subprograms for Fortran Usage,
|
|---|
| 960 | Algorithm 539,
|
|---|
| 961 | ACM Transactions on Mathematical Software,
|
|---|
| 962 | Volume 5, Number 3, September 1979, pages 308-323.
|
|---|
| 963 |
|
|---|
| 964 | Parameters:
|
|---|
| 965 |
|
|---|
| 966 | Input, int N, the number of entries in the vectors.
|
|---|
| 967 |
|
|---|
| 968 | Input, float X[*], the first vector.
|
|---|
| 969 |
|
|---|
| 970 | Input, int INCX, the increment between successive entries in X.
|
|---|
| 971 |
|
|---|
| 972 | Input, float Y[*], the second vector.
|
|---|
| 973 |
|
|---|
| 974 | Input, int INCY, the increment between successive entries in Y.
|
|---|
| 975 |
|
|---|
| 976 | Output, float SDOT, the sum of the product of the corresponding
|
|---|
| 977 | entries of X and Y.
|
|---|
| 978 | */
|
|---|
| 979 | {
|
|---|
| 980 | int i;
|
|---|
| 981 | int ix;
|
|---|
| 982 | int iy;
|
|---|
| 983 | int m;
|
|---|
| 984 | float temp;
|
|---|
| 985 |
|
|---|
| 986 | temp = 0.0;
|
|---|
| 987 |
|
|---|
| 988 | if ( n <= 0 )
|
|---|
| 989 | {
|
|---|
| 990 | return temp;
|
|---|
| 991 | }
|
|---|
| 992 | /*
|
|---|
| 993 | Code for unequal increments or equal increments
|
|---|
| 994 | not equal to 1.
|
|---|
| 995 | */
|
|---|
| 996 | if ( incx != 1 || incy != 1 )
|
|---|
| 997 | {
|
|---|
| 998 | if ( 0 <= incx )
|
|---|
| 999 | {
|
|---|
| 1000 | ix = 0;
|
|---|
| 1001 | }
|
|---|
| 1002 | else
|
|---|
| 1003 | {
|
|---|
| 1004 | ix = ( - n + 1 ) * incx;
|
|---|
| 1005 | }
|
|---|
| 1006 |
|
|---|
| 1007 | if ( 0 <= incy )
|
|---|
| 1008 | {
|
|---|
| 1009 | iy = 0;
|
|---|
| 1010 | }
|
|---|
| 1011 | else
|
|---|
| 1012 | {
|
|---|
| 1013 | iy = ( - n + 1 ) * incy;
|
|---|
| 1014 | }
|
|---|
| 1015 |
|
|---|
| 1016 | for ( i = 0; i < n; i++ )
|
|---|
| 1017 | {
|
|---|
| 1018 | temp = temp + x[ix] * y[iy];
|
|---|
| 1019 | ix = ix + incx;
|
|---|
| 1020 | iy = iy + incy;
|
|---|
| 1021 | }
|
|---|
| 1022 | }
|
|---|
| 1023 | /*
|
|---|
| 1024 | Code for both increments equal to 1.
|
|---|
| 1025 | */
|
|---|
| 1026 | else
|
|---|
| 1027 | {
|
|---|
| 1028 | m = n % 5;
|
|---|
| 1029 |
|
|---|
| 1030 | for ( i = 0; i < m; i++ )
|
|---|
| 1031 | {
|
|---|
| 1032 | temp = temp + x[i] * y[i];
|
|---|
| 1033 | }
|
|---|
| 1034 |
|
|---|
| 1035 | for ( i = m; i < n; i = i + 5 )
|
|---|
| 1036 | {
|
|---|
| 1037 | temp = temp + x[i ] * y[i ]
|
|---|
| 1038 | + x[i+1] * y[i+1]
|
|---|
| 1039 | + x[i+2] * y[i+2]
|
|---|
| 1040 | + x[i+3] * y[i+3]
|
|---|
| 1041 | + x[i+4] * y[i+4];
|
|---|
| 1042 | }
|
|---|
| 1043 | }
|
|---|
| 1044 |
|
|---|
| 1045 | return temp;
|
|---|
| 1046 | }
|
|---|
| 1047 | /*******************************************************************************/
|
|---|
| 1048 |
|
|---|
| 1049 | int sgefa ( float a[], int lda, int n, int ipvt[] )
|
|---|
| 1050 |
|
|---|
| 1051 | /*******************************************************************************/
|
|---|
| 1052 | /*
|
|---|
| 1053 | Purpose:
|
|---|
| 1054 |
|
|---|
| 1055 | SGEFA factors a matrix by gaussian elimination.
|
|---|
| 1056 |
|
|---|
| 1057 | Discussion:
|
|---|
| 1058 |
|
|---|
| 1059 | Matrix references which would, mathematically, be written A(I,J)
|
|---|
| 1060 | must be written here as:
|
|---|
| 1061 | * A[I+J*LDA], when the value is needed, or
|
|---|
| 1062 | * A+I+J*LDA, when the address is needed.
|
|---|
| 1063 |
|
|---|
| 1064 | Modified:
|
|---|
| 1065 |
|
|---|
| 1066 | 07 March 2008
|
|---|
| 1067 |
|
|---|
| 1068 | Author:
|
|---|
| 1069 |
|
|---|
| 1070 | FORTRAN77 original version by Cleve Moler.
|
|---|
| 1071 | C version by John Burkardt.
|
|---|
| 1072 |
|
|---|
| 1073 | Reference:
|
|---|
| 1074 |
|
|---|
| 1075 | Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
|
|---|
| 1076 | LINPACK User's Guide,
|
|---|
| 1077 | SIAM, 1979,
|
|---|
| 1078 | ISBN13: 978-0-898711-72-1,
|
|---|
| 1079 | LC: QA214.L56.
|
|---|
| 1080 |
|
|---|
| 1081 | Parameters:
|
|---|
| 1082 |
|
|---|
| 1083 | Input/output, float A[LDA*N]. On input, the matrix to be factored.
|
|---|
| 1084 | On output, an upper triangular matrix and the multipliers which were
|
|---|
| 1085 | used to obtain it. The factorization can be written A = L * U where
|
|---|
| 1086 | L is a product of permutation and unit lower triangular matrices and
|
|---|
| 1087 | U is upper triangular.
|
|---|
| 1088 |
|
|---|
| 1089 | Input, int LDA, the leading dimension of the matrix.
|
|---|
| 1090 |
|
|---|
| 1091 | Input, int N, the order of the matrix.
|
|---|
| 1092 |
|
|---|
| 1093 | Output, int IPVT[N], the pivot indices.
|
|---|
| 1094 |
|
|---|
| 1095 | Output, int SGEFA, indicates singularity.
|
|---|
| 1096 | If 0, this is the normal value, and the algorithm succeeded.
|
|---|
| 1097 | If K, then on the K-th elimination step, a zero pivot was encountered.
|
|---|
| 1098 | The matrix is numerically not invertible.
|
|---|
| 1099 | */
|
|---|
| 1100 | {
|
|---|
| 1101 | int j;
|
|---|
| 1102 | int info;
|
|---|
| 1103 | int k;
|
|---|
| 1104 | int kp1;
|
|---|
| 1105 | int l;
|
|---|
| 1106 | int nm1;
|
|---|
| 1107 | float t;
|
|---|
| 1108 |
|
|---|
| 1109 | info = 0;
|
|---|
| 1110 |
|
|---|
| 1111 | for ( k = 1; k <= n - 1; k++ )
|
|---|
| 1112 | {
|
|---|
| 1113 | /*
|
|---|
| 1114 | Find l = pivot index.
|
|---|
| 1115 | */
|
|---|
| 1116 | l = isamax ( n-k+1, &a[k-1+(k-1)*lda], 1 ) + k - 1;
|
|---|
| 1117 | ipvt[k-1] = l;
|
|---|
| 1118 | /*
|
|---|
| 1119 | Zero pivot implies this column already triangularized.
|
|---|
| 1120 | */
|
|---|
| 1121 | if ( a[l-1+(k-1)*lda] != 0.0 )
|
|---|
| 1122 | {
|
|---|
| 1123 | /*
|
|---|
| 1124 | Interchange if necessary.
|
|---|
| 1125 | */
|
|---|
| 1126 | if ( l != k )
|
|---|
| 1127 | {
|
|---|
| 1128 | t = a[l-1+(k-1)*lda];
|
|---|
| 1129 | a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda];
|
|---|
| 1130 | a[k-1+(k-1)*lda] = t;
|
|---|
| 1131 | }
|
|---|
| 1132 | /*
|
|---|
| 1133 | Compute multipliers.
|
|---|
| 1134 | */
|
|---|
| 1135 | t = - 1.0 / a[k-1+(k-1)*lda];
|
|---|
| 1136 | sscal ( n-k, t, &a[k+(k-1)*lda], 1 );
|
|---|
| 1137 | /*
|
|---|
| 1138 | Row elimination with column indexing.
|
|---|
| 1139 | */
|
|---|
| 1140 | for ( j = k + 1; j <= n; j++ )
|
|---|
| 1141 | {
|
|---|
| 1142 | t = a[l-1+(j-1)*lda];
|
|---|
| 1143 | if (l != k)
|
|---|
| 1144 | {
|
|---|
| 1145 | a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda];
|
|---|
| 1146 | a[k-1+(j-1)*lda] = t;
|
|---|
| 1147 | }
|
|---|
| 1148 | saxpy ( n-k, t, &a[k+(k-1)*lda], 1, &a[k+(j-1)*lda], 1 );
|
|---|
| 1149 | }
|
|---|
| 1150 | }
|
|---|
| 1151 | else
|
|---|
| 1152 | {
|
|---|
| 1153 | info = k;
|
|---|
| 1154 | }
|
|---|
| 1155 | }
|
|---|
| 1156 | ipvt[n-1] = n;
|
|---|
| 1157 |
|
|---|
| 1158 | if (a[n-1+(n-1)*lda] == 0.0 )
|
|---|
| 1159 | {
|
|---|
| 1160 | info = n - 1;
|
|---|
| 1161 | }
|
|---|
| 1162 | return info;
|
|---|
| 1163 | }
|
|---|
| 1164 | /******************************************************************************/
|
|---|
| 1165 |
|
|---|
| 1166 | void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )
|
|---|
| 1167 |
|
|---|
| 1168 | /******************************************************************************/
|
|---|
| 1169 | /*
|
|---|
| 1170 | Purpose:
|
|---|
| 1171 |
|
|---|
| 1172 | SGESL solves a real general linear system A * X = B.
|
|---|
| 1173 |
|
|---|
| 1174 | Discussion:
|
|---|
| 1175 |
|
|---|
| 1176 | SGESL can solve either of the systems A * X = B or A' * X = B.
|
|---|
| 1177 |
|
|---|
| 1178 | The system matrix must have been factored by SGECO or SGEFA.
|
|---|
| 1179 |
|
|---|
| 1180 | A division by zero will occur if the input factor contains a
|
|---|
| 1181 | zero on the diagonal. Technically this indicates singularity
|
|---|
| 1182 | but it is often caused by improper arguments or improper
|
|---|
| 1183 | setting of LDA. It will not occur if the subroutines are
|
|---|
| 1184 | called correctly and if SGECO has set 0.0 < RCOND
|
|---|
| 1185 | or SGEFA has set INFO == 0.
|
|---|
| 1186 |
|
|---|
| 1187 | Modified:
|
|---|
| 1188 |
|
|---|
| 1189 | 04 April 2006
|
|---|
| 1190 |
|
|---|
| 1191 | Author:
|
|---|
| 1192 |
|
|---|
| 1193 | FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
|
|---|
| 1194 | C translation by John Burkardt.
|
|---|
| 1195 |
|
|---|
| 1196 | Reference:
|
|---|
| 1197 |
|
|---|
| 1198 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 1199 | LINPACK User's Guide,
|
|---|
| 1200 | SIAM, (Society for Industrial and Applied Mathematics),
|
|---|
| 1201 | 3600 University City Science Center,
|
|---|
| 1202 | Philadelphia, PA, 19104-2688.
|
|---|
| 1203 | ISBN: 0-89871-172-X
|
|---|
| 1204 |
|
|---|
| 1205 | Parameters:
|
|---|
| 1206 |
|
|---|
| 1207 | Input, float A[LDA*N], the output from SGECO or SGEFA.
|
|---|
| 1208 |
|
|---|
| 1209 | Input, int LDA, the leading dimension of A.
|
|---|
| 1210 |
|
|---|
| 1211 | Input, int N, the order of the matrix A.
|
|---|
| 1212 |
|
|---|
| 1213 | Input, int IPVT[N], the pivot vector from SGECO or SGEFA.
|
|---|
| 1214 |
|
|---|
| 1215 | Input/output, float B[N].
|
|---|
| 1216 | On input, the right hand side vector.
|
|---|
| 1217 | On output, the solution vector.
|
|---|
| 1218 |
|
|---|
| 1219 | Input, int JOB.
|
|---|
| 1220 | 0, solve A * X = B;
|
|---|
| 1221 | nonzero, solve A' * X = B.
|
|---|
| 1222 | */
|
|---|
| 1223 | {
|
|---|
| 1224 | int k;
|
|---|
| 1225 | int l;
|
|---|
| 1226 | float t;
|
|---|
| 1227 | /*
|
|---|
| 1228 | Solve A * X = B.
|
|---|
| 1229 | */
|
|---|
| 1230 | if ( job == 0 )
|
|---|
| 1231 | {
|
|---|
| 1232 | for ( k = 1; k <= n-1; k++ )
|
|---|
| 1233 | {
|
|---|
| 1234 | l = ipvt[k-1];
|
|---|
| 1235 | t = b[l-1];
|
|---|
| 1236 |
|
|---|
| 1237 | if ( l != k )
|
|---|
| 1238 | {
|
|---|
| 1239 | b[l-1] = b[k-1];
|
|---|
| 1240 | b[k-1] = t;
|
|---|
| 1241 | }
|
|---|
| 1242 | saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
|
|---|
| 1243 | }
|
|---|
| 1244 |
|
|---|
| 1245 | for ( k = n; 1 <= k; k-- )
|
|---|
| 1246 | {
|
|---|
| 1247 | b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
|
|---|
| 1248 | t = -b[k-1];
|
|---|
| 1249 | saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
|
|---|
| 1250 | }
|
|---|
| 1251 | }
|
|---|
| 1252 | /*
|
|---|
| 1253 | Solve A' * X = B.
|
|---|
| 1254 | */
|
|---|
| 1255 | else
|
|---|
| 1256 | {
|
|---|
| 1257 | for ( k = 1; k <= n; k++ )
|
|---|
| 1258 | {
|
|---|
| 1259 | t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
|
|---|
| 1260 | b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
|
|---|
| 1261 | }
|
|---|
| 1262 |
|
|---|
| 1263 | for ( k = n-1; 1 <= k; k-- )
|
|---|
| 1264 | {
|
|---|
| 1265 | b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
|
|---|
| 1266 | l = ipvt[k-1];
|
|---|
| 1267 |
|
|---|
| 1268 | if ( l != k )
|
|---|
| 1269 | {
|
|---|
| 1270 | t = b[l-1];
|
|---|
| 1271 | b[l-1] = b[k-1];
|
|---|
| 1272 | b[k-1] = t;
|
|---|
| 1273 | }
|
|---|
| 1274 | }
|
|---|
| 1275 | }
|
|---|
| 1276 | return;
|
|---|
| 1277 | }
|
|---|
| 1278 | /******************************************************************************/
|
|---|
| 1279 |
|
|---|
| 1280 | void sscal ( int n, float sa, float x[], int incx )
|
|---|
| 1281 |
|
|---|
| 1282 | /******************************************************************************/
|
|---|
| 1283 | /*
|
|---|
| 1284 | Purpose:
|
|---|
| 1285 |
|
|---|
| 1286 | SSCAL scales a float vector by a constant.
|
|---|
| 1287 |
|
|---|
| 1288 | Modified:
|
|---|
| 1289 |
|
|---|
| 1290 | 23 February 2006
|
|---|
| 1291 |
|
|---|
| 1292 | Author:
|
|---|
| 1293 |
|
|---|
| 1294 | Jack Dongarra
|
|---|
| 1295 | C version by John Burkardt
|
|---|
| 1296 |
|
|---|
| 1297 | Reference:
|
|---|
| 1298 |
|
|---|
| 1299 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 1300 | LINPACK User's Guide,
|
|---|
| 1301 | SIAM, 1979,
|
|---|
| 1302 | ISBN13: 978-0-898711-72-1,
|
|---|
| 1303 | LC: QA214.L56.
|
|---|
| 1304 |
|
|---|
| 1305 | Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
|
|---|
| 1306 | Basic Linear Algebra Subprograms for Fortran Usage,
|
|---|
| 1307 | Algorithm 539,
|
|---|
| 1308 | ACM Transactions on Mathematical Software,
|
|---|
| 1309 | Volume 5, Number 3, September 1979, pages 308-323.
|
|---|
| 1310 |
|
|---|
| 1311 | Parameters:
|
|---|
| 1312 |
|
|---|
| 1313 | Input, int N, the number of entries in the vector.
|
|---|
| 1314 |
|
|---|
| 1315 | Input, float SA, the multiplier.
|
|---|
| 1316 |
|
|---|
| 1317 | Input/output, float X[*], the vector to be scaled.
|
|---|
| 1318 |
|
|---|
| 1319 | Input, int INCX, the increment between successive entries of X.
|
|---|
| 1320 | */
|
|---|
| 1321 | {
|
|---|
| 1322 | int i;
|
|---|
| 1323 | int ix;
|
|---|
| 1324 | int m;
|
|---|
| 1325 |
|
|---|
| 1326 | if ( n <= 0 )
|
|---|
| 1327 | {
|
|---|
| 1328 | }
|
|---|
| 1329 | else if ( incx == 1 )
|
|---|
| 1330 | {
|
|---|
| 1331 | m = n % 5;
|
|---|
| 1332 |
|
|---|
| 1333 | for ( i = 0; i < m; i++ )
|
|---|
| 1334 | {
|
|---|
| 1335 | x[i] = sa * x[i];
|
|---|
| 1336 | }
|
|---|
| 1337 |
|
|---|
| 1338 | for ( i = m; i < n; i = i + 5 )
|
|---|
| 1339 | {
|
|---|
| 1340 | x[i] = sa * x[i];
|
|---|
| 1341 | x[i+1] = sa * x[i+1];
|
|---|
| 1342 | x[i+2] = sa * x[i+2];
|
|---|
| 1343 | x[i+3] = sa * x[i+3];
|
|---|
| 1344 | x[i+4] = sa * x[i+4];
|
|---|
| 1345 | }
|
|---|
| 1346 | }
|
|---|
| 1347 | else
|
|---|
| 1348 | {
|
|---|
| 1349 | if ( 0 <= incx )
|
|---|
| 1350 | {
|
|---|
| 1351 | ix = 0;
|
|---|
| 1352 | }
|
|---|
| 1353 | else
|
|---|
| 1354 | {
|
|---|
| 1355 | ix = ( - n + 1 ) * incx;
|
|---|
| 1356 | }
|
|---|
| 1357 |
|
|---|
| 1358 | for ( i = 0; i < n; i++ )
|
|---|
| 1359 | {
|
|---|
| 1360 | x[ix] = sa * x[ix];
|
|---|
| 1361 | ix = ix + incx;
|
|---|
| 1362 | }
|
|---|
| 1363 |
|
|---|
| 1364 | }
|
|---|
| 1365 |
|
|---|
| 1366 | return;
|
|---|
| 1367 | }
|
|---|
| 1368 | /******************************************************************************/
|
|---|
| 1369 |
|
|---|
| 1370 | void sswap ( int n, float x[], int incx, float y[], int incy )
|
|---|
| 1371 |
|
|---|
| 1372 | /******************************************************************************/
|
|---|
| 1373 | /*
|
|---|
| 1374 | Purpose:
|
|---|
| 1375 |
|
|---|
| 1376 | SSWAP interchanges two float vectors.
|
|---|
| 1377 |
|
|---|
| 1378 | Modified:
|
|---|
| 1379 |
|
|---|
| 1380 | 23 February 2006
|
|---|
| 1381 |
|
|---|
| 1382 | Author:
|
|---|
| 1383 |
|
|---|
| 1384 | C version by John Burkardt
|
|---|
| 1385 |
|
|---|
| 1386 | Reference:
|
|---|
| 1387 |
|
|---|
| 1388 | Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
|
|---|
| 1389 | LINPACK User's Guide,
|
|---|
| 1390 | SIAM, 1979,
|
|---|
| 1391 | ISBN13: 978-0-898711-72-1,
|
|---|
| 1392 | LC: QA214.L56.
|
|---|
| 1393 |
|
|---|
| 1394 | Charles Lawson, Richard Hanson, David Kincaid, Fred Krogh,
|
|---|
| 1395 | Basic Linear Algebra Subprograms for Fortran Usage,
|
|---|
| 1396 | Algorithm 539,
|
|---|
| 1397 | ACM Transactions on Mathematical Software,
|
|---|
| 1398 | Volume 5, Number 3, September 1979, pages 308-323.
|
|---|
| 1399 |
|
|---|
| 1400 | Parameters:
|
|---|
| 1401 |
|
|---|
| 1402 | Input, int N, the number of entries in the vectors.
|
|---|
| 1403 |
|
|---|
| 1404 | Input/output, float X[*], one of the vectors to swap.
|
|---|
| 1405 |
|
|---|
| 1406 | Input, int INCX, the increment between successive entries of X.
|
|---|
| 1407 |
|
|---|
| 1408 | Input/output, float Y[*], one of the vectors to swap.
|
|---|
| 1409 |
|
|---|
| 1410 | Input, int INCY, the increment between successive elements of Y.
|
|---|
| 1411 | */
|
|---|
| 1412 | {
|
|---|
| 1413 | int i;
|
|---|
| 1414 | int ix;
|
|---|
| 1415 | int iy;
|
|---|
| 1416 | int m;
|
|---|
| 1417 | float temp;
|
|---|
| 1418 |
|
|---|
| 1419 | if ( n <= 0 )
|
|---|
| 1420 | {
|
|---|
| 1421 | }
|
|---|
| 1422 | else if ( incx == 1 && incy == 1 )
|
|---|
| 1423 | {
|
|---|
| 1424 | m = n % 3;
|
|---|
| 1425 |
|
|---|
| 1426 | for ( i = 0; i < m; i++ )
|
|---|
| 1427 | {
|
|---|
| 1428 | temp = x[i];
|
|---|
| 1429 | x[i] = y[i];
|
|---|
| 1430 | y[i] = temp;
|
|---|
| 1431 | }
|
|---|
| 1432 |
|
|---|
| 1433 | for ( i = m; i < n; i = i + 3 )
|
|---|
| 1434 | {
|
|---|
| 1435 | temp = x[i];
|
|---|
| 1436 | x[i] = y[i];
|
|---|
| 1437 | y[i] = temp;
|
|---|
| 1438 |
|
|---|
| 1439 | temp = x[i+1];
|
|---|
| 1440 | x[i+1] = y[i+1];
|
|---|
| 1441 | y[i+1] = temp;
|
|---|
| 1442 |
|
|---|
| 1443 | temp = x[i+2];
|
|---|
| 1444 | x[i+2] = y[i+2];
|
|---|
| 1445 | y[i+2] = temp;
|
|---|
| 1446 | }
|
|---|
| 1447 | }
|
|---|
| 1448 | else
|
|---|
| 1449 | {
|
|---|
| 1450 | if ( 0 <= incx )
|
|---|
| 1451 | {
|
|---|
| 1452 | ix = 0;
|
|---|
| 1453 | }
|
|---|
| 1454 | else
|
|---|
| 1455 | {
|
|---|
| 1456 | ix = ( - n + 1 ) * incx;
|
|---|
| 1457 | }
|
|---|
| 1458 |
|
|---|
| 1459 | if ( 0 <= incy )
|
|---|
| 1460 | {
|
|---|
| 1461 | iy = 0;
|
|---|
| 1462 | }
|
|---|
| 1463 | else
|
|---|
| 1464 | {
|
|---|
| 1465 | iy = ( - n + 1 ) * incy;
|
|---|
| 1466 | }
|
|---|
| 1467 |
|
|---|
| 1468 | for ( i = 0; i < n; i++ )
|
|---|
| 1469 | {
|
|---|
| 1470 | temp = x[ix];
|
|---|
| 1471 | x[ix] = y[iy];
|
|---|
| 1472 | y[iy] = temp;
|
|---|
| 1473 | ix = ix + incx;
|
|---|
| 1474 | iy = iy + incy;
|
|---|
| 1475 | }
|
|---|
| 1476 | }
|
|---|
| 1477 | return;
|
|---|
| 1478 | }
|
|---|
| 1479 | /******************************************************************************/
|
|---|
| 1480 |
|
|---|
| 1481 | void timestamp ( void )
|
|---|
| 1482 |
|
|---|
| 1483 | /******************************************************************************/
|
|---|
| 1484 | /*
|
|---|
| 1485 | Purpose:
|
|---|
| 1486 |
|
|---|
| 1487 | TIMESTAMP prints the current YMDHMS date as a time stamp.
|
|---|
| 1488 |
|
|---|
| 1489 | Example:
|
|---|
| 1490 |
|
|---|
| 1491 | 31 May 2001 09:45:54 AM
|
|---|
| 1492 |
|
|---|
| 1493 | Modified:
|
|---|
| 1494 |
|
|---|
| 1495 | 24 September 2003
|
|---|
| 1496 |
|
|---|
| 1497 | Author:
|
|---|
| 1498 |
|
|---|
| 1499 | John Burkardt
|
|---|
| 1500 |
|
|---|
| 1501 | Parameters:
|
|---|
| 1502 |
|
|---|
| 1503 | None
|
|---|
| 1504 | */
|
|---|
| 1505 | {
|
|---|
| 1506 | # define TIME_SIZE 40
|
|---|
| 1507 |
|
|---|
| 1508 | static char time_buffer[TIME_SIZE];
|
|---|
| 1509 | const struct tm *tm;
|
|---|
| 1510 | size_t len;
|
|---|
| 1511 | time_t now;
|
|---|
| 1512 |
|
|---|
| [e479e1e] | 1513 | //now = time ( NULL );
|
|---|
| 1514 | //tm = localtime ( &now );
|
|---|
| [5f600f6] | 1515 |
|
|---|
| [e479e1e] | 1516 | //len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
|
|---|
| [5f600f6] | 1517 |
|
|---|
| [e479e1e] | 1518 | //printf ( "%s\n", time_buffer );
|
|---|
| [5f600f6] | 1519 |
|
|---|
| 1520 | return;
|
|---|
| 1521 | # undef TIME_SIZE
|
|---|
| 1522 | }
|
|---|