Blame view

fvn_sparse/UMFPACK/Source/umf_start_front.c 8.56 KB
422234dc3   daniau   git-svn-id: https...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
  /* ========================================================================== */
  /* === UMF_start_front ====================================================== */
  /* ========================================================================== */
  
  /* -------------------------------------------------------------------------- */
  /* UMFPACK Copyright (c) Timothy A. Davis, CISE,                              */
  /* Univ. of Florida.  All Rights Reserved.  See ../Doc/License for License.   */
  /* web: http://www.cise.ufl.edu/research/sparse/umfpack                       */
  /* -------------------------------------------------------------------------- */
  
  /* Allocate the initial frontal matrix working array for a single chain.  The
   * front does not have to be big enough, since if it's too small it will get
   * reallocated.  The size computed here is just an estimate. */
  
  #include "umf_internal.h"
  #include "umf_grow_front.h"
  
  GLOBAL Int UMF_start_front    /* returns TRUE if successful, FALSE otherwise */
  (
      Int chain,
      NumericType *Numeric,
      WorkType *Work,
      SymbolicType *Symbolic
  )
  {
      double maxbytes ;
      Int fnrows_max, fncols_max, fnr2, fnc2, fsize, fcurr_size, maxfrsize,
  	overflow, nb, f, cdeg ;
  
      nb = Symbolic->nb ;
      fnrows_max = Symbolic->Chain_maxrows [chain] ;
      fncols_max = Symbolic->Chain_maxcols [chain] ;
  
      DEBUGm2 (("Start Front for chain "ID".  fnrows_max "ID" fncols_max "ID"
  ",
  	chain, fnrows_max, fncols_max)) ;
  
      Work->fnrows_max = fnrows_max ;
      Work->fncols_max = fncols_max ;
      Work->any_skip = FALSE ;
  
      maxbytes = sizeof (Entry) *
  	(double) (fnrows_max + nb) * (double) (fncols_max + nb) ;
      fcurr_size = Work->fcurr_size ;
  
      if (Symbolic->prefer_diagonal)
      {
  	/* Get a rough upper bound on the degree of the first pivot column in
  	 * this front.  Note that Col_degree is not maintained if diagonal
  	 * pivoting is preferred.  For most matrices, the first pivot column
  	 * of the first frontal matrix of a new chain has only one tuple in
  	 * it anyway, so this bound is exact in that case. */
  	Int col, tpi, e, *E, *Col_tuples, *Col_tlen, *Cols ;
  	Tuple *tp, *tpend ;
  	Unit *Memory, *p ;
  	Element *ep ;
  	E = Work->E ;
  	Memory = Numeric->Memory ;
  	Col_tuples = Numeric->Lip ;
  	Col_tlen = Numeric->Lilen ;
  	col = Work->nextcand ;
  	tpi = Col_tuples [col] ;
  	tp = (Tuple *) Memory + tpi ;
  	tpend = tp + Col_tlen [col] ;
  	cdeg = 0 ;
  	DEBUGm3 (("
  =============== start front: col "ID" tlen "ID"
  ",
  		col, Col_tlen [col])) ;
  	for ( ; tp < tpend ; tp++)
  	{
  	    DEBUG1 (("Tuple ("ID","ID")
  ", tp->e, tp->f)) ;
  	    e = tp->e ;
  	    if (!E [e]) continue ;
  	    f = tp->f ;
  	    p = Memory + E [e] ;
  	    ep = (Element *) p ;
  	    p += UNITS (Element, 1) ;
  	    Cols = (Int *) p ;
  	    if (Cols [f] == EMPTY) continue ;
  	    DEBUG1 (("  nrowsleft "ID"
  ", ep->nrowsleft)) ;
  	    cdeg += ep->nrowsleft ;
  	}
  #ifndef NDEBUG
  	DEBUGm3 (("start front cdeg: "ID" col "ID"
  ", cdeg, col)) ;
  	UMF_dump_rowcol (1, Numeric, Work, col, FALSE) ;
  #endif
  
  	/* cdeg is now the rough upper bound on the degree of the next pivot
  	 * column. */
  
  	/* If AMD was called, we know the maximum number of nonzeros in any
  	 * column of L.  Use this as an upper bound for cdeg, but add 2 to
  	 * account for a small amount of off-diagonal pivoting. */
  	if (Symbolic->amd_dmax > 0)
  	{
  	    cdeg = MIN (cdeg, Symbolic->amd_dmax) ;
  	}
  
  	/* Increase it to account for larger columns later on.
  	 * Also ensure that it's larger than zero. */
  	cdeg += 2 ;
  
  	/* cdeg cannot be larger than fnrows_max */
  	cdeg = MIN (cdeg, fnrows_max) ;
  
      }
      else
      {
  	/* don't do the above cdeg computation */
  	cdeg = 0 ;
      }
  
      DEBUGm2 (("fnrows max "ID" fncols_max "ID"
  ", fnrows_max, fncols_max)) ;
  
      /* the current frontal matrix is empty */
      ASSERT (Work->fnrows == 0 && Work->fncols == 0 && Work->fnpiv == 0) ;
  
      /* maximum row dimension is always odd, to avoid bad cache effects */
      ASSERT (fnrows_max >= 0) ;
      ASSERT (fnrows_max % 2 == 1) ;
  
      /* ----------------------------------------------------------------------
       * allocate working array for current frontal matrix:
       * minimum size: 1-by-1
       * maximum size: fnrows_max-by-fncols_max
       * desired size:
       *
       *   if Numeric->front_alloc_init >= 0:
       *
       *	    for unsymmetric matrices:
       *	    Numeric->front_alloc_init * (fnrows_max-by-fncols_max)
       *
       *	    for symmetric matrices (diagonal pivoting preference, actually):
       *	    Numeric->front_alloc_init * (fnrows_max-by-fncols_max), or
       *	    cdeg*cdeg, whichever is smaller.
       *
       *   if Numeric->front_alloc_init < 0:
       *	    allocate a front of size -Numeric->front_alloc_init.
       *
       * Allocate the whole thing if it's small (less than 2*nb^2).  Make sure the
       * leading dimension of the frontal matrix is odd.
       *
       * Also allocate the nb-by-nb LU block, the dr-by-nb L block, and the
       * nb-by-dc U block.
       * ---------------------------------------------------------------------- */
  
      /* get the maximum front size, avoiding integer overflow */
      overflow = INT_OVERFLOW (maxbytes) ;
      if (overflow)
      {
  	/* :: int overflow, max front size :: */
  	maxfrsize = Int_MAX / sizeof (Entry) ;
      }
      else
      {
  	maxfrsize = (fnrows_max + nb) * (fncols_max + nb) ;
      }
      ASSERT (!INT_OVERFLOW ((double) maxfrsize * sizeof (Entry))) ;
  
      if (Numeric->front_alloc_init < 0)
      {
  	/* allocate a front of -Numeric->front_alloc_init entries */
  	fsize = -Numeric->front_alloc_init ;
  	fsize = MAX (1, fsize) ;
      }
      else
      {
  	if (INT_OVERFLOW (Numeric->front_alloc_init * maxbytes))
  	{
  	    /* :: int overflow, requested front size :: */
  	    fsize = Int_MAX / sizeof (Entry) ;
  	}
  	else
  	{
  	    fsize = Numeric->front_alloc_init * maxfrsize ;
  	}
  
  	if (cdeg > 0)
  	{
  	    /* diagonal pivoting is in use.  cdeg was computed above */
  	    Int fsize2 ;
  
  	    /* add the L and U blocks */
  	    cdeg += nb ;
  
  	    if (INT_OVERFLOW (((double) cdeg * (double) cdeg) * sizeof (Entry)))
  	    {
  		/* :: int overflow, symmetric front size :: */
  		fsize2 = Int_MAX / sizeof (Entry) ;
  	    }
  	    else
  	    {
  		fsize2 = MAX (cdeg * cdeg, fcurr_size) ;
  	    }
  	    fsize = MIN (fsize, fsize2) ;
  	}
      }
  
      fsize = MAX (fsize, 2*nb*nb) ;
  
      /* fsize and maxfrsize are now safe from integer overflow.  They both
       * include the size of the pivot blocks. */
      ASSERT (!INT_OVERFLOW ((double) fsize * sizeof (Entry))) ;
  
      Work->fnrows_new = 0 ;
      Work->fncols_new = 0 ;
  
      /* desired size is fnr2-by-fnc2 (includes L and U blocks): */
      DEBUGm2 (("    fsize "ID"  fcurr_size "ID"
  ", fsize, fcurr_size)) ;
      DEBUGm2 (("    maxfrsize "ID"  fnr_curr "ID" fnc_curr "ID"
  ", maxfrsize,
  	Work->fnr_curr, Work->fnc_curr)) ;
  
      if (fsize >= maxfrsize && !overflow)
      {
  	/* max working array is small, allocate all of it */
  	fnr2 = fnrows_max + nb ;
  	fnc2 = fncols_max + nb ;
  	fsize = maxfrsize ;
  	DEBUGm1 (("   sufficient for ("ID"+"ID")-by-("ID"+"ID")
  ",
  	    fnrows_max, nb, fncols_max, nb)) ;
      }
      else
      {
  	/* allocate a smaller working array */
  	if (fnrows_max <= fncols_max)
  	{
  	    fnr2 = (Int) sqrt ((double) fsize) ;
  	    /* make sure fnr2 is odd */
  	    fnr2 = MAX (fnr2, 1) ;
  	    if (fnr2 % 2 == 0) fnr2++ ;
  	    fnr2 = MIN (fnr2, fnrows_max + nb) ;
  	    fnc2 = fsize / fnr2 ;
  	}
  	else
  	{
  	    fnc2 = (Int) sqrt ((double) fsize) ;
  	    fnc2 = MIN (fnc2, fncols_max + nb) ;
  	    fnr2 = fsize / fnc2 ;
  	    /* make sure fnr2 is odd */
  	    fnr2 = MAX (fnr2, 1) ;
  	    if (fnr2 % 2 == 0)
  	    {
  		fnr2++ ;
  		fnc2 = fsize / fnr2 ;
  	    }
  	}
  	DEBUGm1 (("   smaller "ID"-by-"ID"
  ", fnr2, fnc2)) ;
      }
      fnr2 = MIN (fnr2, fnrows_max + nb) ;
      fnc2 = MIN (fnc2, fncols_max + nb) ;
      ASSERT (fnr2 % 2 == 1) ;
      ASSERT (fnr2 * fnc2 <= fsize) ;
  
      fnr2 -= nb ;
      fnc2 -= nb ;
      ASSERT (fnr2 >= 0) ;
      ASSERT (fnc2 >= 0) ;
  
      if (fsize > fcurr_size)
      {
  	DEBUGm1 (("   Grow front 
  ")) ;
  	Work->do_grow = TRUE ;
  	if (!UMF_grow_front (Numeric, fnr2, fnc2, Work, -1))
  	{
  	    /* since the minimum front size is 1-by-1, it would be nearly
  	     * impossible to run out of memory here. */
  	    DEBUGm4 (("out of memory: start front
  ")) ;
  	    return (FALSE) ;
  	}
      }
      else
      {
  	/* use the existing front */
  	DEBUGm1 (("   existing front ok
  ")) ;
  	Work->fnr_curr = fnr2 ;
  	Work->fnc_curr = fnc2 ;
  	Work->Flblock  = Work->Flublock + nb * nb ;
  	Work->Fublock  = Work->Flblock  + nb * fnr2 ;
  	Work->Fcblock  = Work->Fublock  + nb * fnc2 ;
      }
      ASSERT (Work->Flblock  == Work->Flublock + Work->nb*Work->nb) ;
      ASSERT (Work->Fublock  == Work->Flblock  + Work->fnr_curr*Work->nb) ;
      ASSERT (Work->Fcblock  == Work->Fublock  + Work->nb*Work->fnc_curr) ;
      return (TRUE) ;
  }