umf_blas3_update.c 4.17 KB
/* ========================================================================== */
/* === UMF_blas3_update ===================================================== */
/* ========================================================================== */

/* -------------------------------------------------------------------------- */
/* UMFPACK Copyright (c) Timothy A. Davis, CISE,                              */
/* Univ. of Florida.  All Rights Reserved.  See ../Doc/License for License.   */
/* web: http://www.cise.ufl.edu/research/sparse/umfpack                       */
/* -------------------------------------------------------------------------- */

#include "umf_internal.h"

GLOBAL void UMF_blas3_update
(
    WorkType *Work
)
{
    /* ---------------------------------------------------------------------- */
    /* local variables */
    /* ---------------------------------------------------------------------- */

    Entry *L, *U, *C, *LU ;
    Int i, j, s, k, m, n, d, nb, dc ;
    
#ifndef NBLAS
    Int blas_ok = TRUE ;
#else
#define blas_ok FALSE
#endif

    DEBUG5 (("In UMF_blas3_update "ID" "ID" "ID"\n",
	Work->fnpiv, Work->fnrows, Work->fncols)) ;

    k = Work->fnpiv ;
    if (k == 0)
    {
	/* no work to do */
	return ;
    }

    m = Work->fnrows ;
    n = Work->fncols ;

    d = Work->fnr_curr ;
    dc = Work->fnc_curr ;
    nb = Work->nb ;
    ASSERT (d >= 0 && (d % 2) == 1) ;
    C = Work->Fcblock ;	    /* ldc is fnr_curr */
    L =	Work->Flblock ;	    /* ldl is fnr_curr */
    U = Work->Fublock ;	    /* ldu is fnc_curr, stored by rows */
    LU = Work->Flublock ;   /* nb-by-nb */

#ifndef NDEBUG
    DEBUG5 (("DO RANK-NB UPDATE of frontal:\n")) ;
    DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ;
    DEBUG7 (("C  block: ")) ; UMF_dump_dense (C,  d, m, n) ;
    DEBUG7 (("A  block: ")) ; UMF_dump_dense (L,  d, m, k) ;
    DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ;
    DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ;
#endif

    if (k == 1)
    {

#ifndef NBLAS
	BLAS_GER (m, n, L, U, C, d) ;
#endif

	if (!blas_ok)
	{
	    /* rank-1 outer product to update the C block */
	    for (j = 0 ; j < n ; j++)
	    {
		Entry u_j = U [j] ;
		if (IS_NONZERO (u_j))
		{
		    Entry *c_ij, *l_is ;
		    c_ij = & C [j*d] ;
		    l_is = & L [0] ;
#pragma ivdep
		    for (i = 0 ; i < m ; i++)
		    {
			/* C [i+j*d]-= L [i] * U [j] */
			MULT_SUB (*c_ij, *l_is, u_j) ;
			c_ij++ ;
			l_is++ ;
		    }
		}
	    }
	}

    }
    else
    {

	/* triangular solve to update the U block */

#ifndef NBLAS
	BLAS_TRSM_RIGHT (n, k, LU, nb, U, dc) ;
#endif

	if (!blas_ok)
	{
	    /* use plain C code if no BLAS at compile time, or if integer
	     * overflow has occurred */
	    for (s = 0 ; s < k ; s++)
	    {
		for (i = s+1 ; i < k ; i++)
		{
		    Entry l_is = LU [i+s*nb] ;
		    if (IS_NONZERO (l_is))
		    {
			Entry *u_ij, *u_sj ;
			u_ij = & U [i*dc] ;
			u_sj = & U [s*dc] ;
#pragma ivdep
			for (j = 0 ; j < n ; j++)
			{
			    /* U [i*dc+j] -= LU [i+s*nb] * U [s*dc+j] ; */
			    MULT_SUB (*u_ij, l_is, *u_sj) ;
			    u_ij++ ;
			    u_sj++ ;
			}
		    }
		}
	    }
	}

	/* rank-k outer product to update the C block */
	/* C = C - L*U' (U is stored by rows, not columns) */

#ifndef NBLAS
	BLAS_GEMM (m, n, k, L, U, dc, C, d) ;
#endif

	if (!blas_ok)
	{
	    /* use plain C code if no BLAS at compile time, or if integer
	     * overflow has occurred */

	    for (s = 0 ; s < k ; s++)
	    {
		for (j = 0 ; j < n ; j++)
		{
		    Entry u_sj = U [j+s*dc] ;
		    if (IS_NONZERO (u_sj))
		    {
			Entry *c_ij, *l_is ;
			c_ij = & C [j*d] ;
			l_is = & L [s*d] ;
#pragma ivdep
			for (i = 0 ; i < m ; i++)
			{
			    /* C [i+j*d]-= L [i+s*d] * U [s*dc+j] */
			    MULT_SUB (*c_ij, *l_is, u_sj) ;
			    c_ij++ ;
			    l_is++ ;
			}
		    }
		}
	    }
	}
    }

#ifndef NDEBUG
    DEBUG5 (("RANK-NB UPDATE of frontal done:\n")) ;
    DEBUG5 (("DGEMM : "ID" "ID" "ID"\n", k, m, n)) ;
    DEBUG7 (("C  block: ")) ; UMF_dump_dense (C,  d, m, n) ;
    DEBUG7 (("A  block: ")) ; UMF_dump_dense (L,  d, m, k) ;
    DEBUG7 (("B' block: ")) ; UMF_dump_dense (U, dc, n, k) ;
    DEBUG7 (("LU block: ")) ; UMF_dump_dense (LU, nb, k, k) ;
#endif

    DEBUG2 (("blas3 "ID" "ID" "ID"\n", k, Work->fnrows, Work->fncols)) ;
}