I suggest you ...

Optimized DenseLU solve

/// <summary>
/// Solves A*X=B for X using LU factorization.
/// </summary>
/// <param name="columnsOfB">The number of columns of B.</param>
/// <param name="a">The square matrix A.</param>
/// <param name="order">The order of the square matrix <paramref name="a"/>.</param>
/// <param name="b">On entry the B matrix; on exit the X matrix.</param>
/// <remarks>This is equivalent to the GETRF and GETRS LAPACK routines.</remarks>
public static unsafe void LUSolveUnsafe(int columnsOfB, double[] aIn, int order, double[] bIn)
{
#region Initialize
if (aIn == null)
throw new ArgumentNullException("a");
if (bIn == null)
throw new ArgumentNullException("b");

int i;
int j;
int k;

double* a = stackalloc double[aIn.Length];
double* b = stackalloc double[bIn.Length];
int* ipiv = stackalloc int[order];

// Marshal
for (i = 0; i < aIn.Length; i++)
a[i] = aIn[i];

for (i = 0; i < bIn.Length; i++)
b[i] = bIn[i];
#endregion

#region Factorize
// Initialize the pivot matrix to the identity permutation.
for (i = 0; i < order; i++)
ipiv[i] = i;

// Outer loop
for (j = 0; j < order; j++)
{
int indexj = j * order;
int indexjj = indexj + j;

// Apply previous transformations.
for (i = 0; i < order; i++)
{
// Most of the time is spent in the following dot product.
int kmax = Math.Min(i, j);
double s = 0.0;
for (k = 0; k < kmax; k++)
s += a[(k * order) + i] * a[indexj + k];

a[indexj + i] -= s;
}

// Find pivot and exchange if necessary.
int p = j;
for (i = j + 1; i < order; i++)
if (Math.Abs(a[indexj + i]) > Math.Abs(a[indexj + p]))
p = i;

if (p != j)
{
for (k = 0; k < order; k++)
{
var temp = a[k * order + p];
a[k * order + p] = a[k * order + j];
a[k * order + j] = temp;
}
ipiv[j] = p;
}

// Compute multipliers.
//if (j < order & a[indexjj] != 0.0)
if (a[indexjj] != 0.0)
for (i = j + 1; i < order; i++)
a[indexj + i] /= a[indexjj];
}
#endregion

#region LUSolveFactored
// Compute the column vector P*B
for (i = 0; i < order; i++)
{
if (ipiv[i] == i)
continue;

int p = ipiv[i];
for (j = 0; j < columnsOfB; j++)
{
var temp = b[j * order + p];
b[j * order + p] = b[j * order + i];
b[j * order + i] = temp;
}
}

// Solve L*Y = P*B
for (k = 0; k < order; k++)
{
var korder = k * order;
for (i = k + 1; i < order; i++)
for (j = 0; j < columnsOfB; j++)
{
var index = j * order;
b[i + index] -= b[k + index] * a[i + korder];
}
}

// Solve U*X = Y;
for (k = order - 1; k >= 0; k--)
{
var korder = k + (k * order);
for (j = 0; j < columnsOfB; j++)
b[k + (j * order)] /= a[korder];

korder = k * order;
for (i = 0; i < k; i++)
for (j = 0; j < columnsOfB; j++)
b[i + j * order] -= b[k + j * order] * a[i + korder];
}
#endregion

// Marshal
for (i = 0; i < bIn.Length; i++)
bIn[i] = b[i];
}

/// <summary>
/// Solves A*X=B for X using LU factorization.
/// </summary>
/// <param name="columnsOfB">The number of columns of B.</param>
/// <param name="a">The square matrix A.</param>
/// <param name="order">The order of the square matrix <paramref name="a"/>.</param>
/// <param name="b">On entry the B matrix; on exit the X matrix.</param>
/// <remarks>This is equivalent to the GETRF and GETRS LAPACK routines.</remarks>
public static void LUSolveSafe(int columnsOfB, double[] aIn, int order, double[] bIn)
{
#region Initialize
if (aIn == null)
throw new ArgumentNullException("a");
if (bIn == null)
throw new ArgumentNullException("b");

int i;
int j;
int k;

double[] a = new double[aIn.Length];
int[] ipiv = new int[order];

Array.Copy(aIn, a, aIn.Length);
#endregion

#region Factorize
// Initialize the pivot matrix to the identity permutation.
for (i = 0; i < order; i++)
ipiv[i] = i;

// Outer loop
for (j = 0; j < order; j++)
{
int indexj = j * order;
int indexjj = indexj + j;

// Apply previous transformations.
for (i = 0; i < order; i++)
{
// Most of the time is spent in the following dot product.
int kmax = Math.Min(i, j);
double s = 0.0;
for (k = 0; k < kmax; k++)
s += a[(k * order) + i] * a[indexj + k];

a[indexj + i] -= s;
}

// Find pivot and exchange if necessary.
int p = j;
for (i = j + 1; i < order; i++)
if (Math.Abs(a[indexj + i]) > Math.Abs(a[indexj + p]))
p = i;

if (p != j)
{
for (k = 0; k < order; k++)
{
var temp = a[k * order + p];
a[k * order + p] = a[k * order + j];
a[k * order + j] = temp;
}
ipiv[j] = p;
}

// Compute multipliers.
//if (j < order & a[indexjj] != 0.0)
if (a[indexjj] != 0.0)
for (i = j + 1; i < order; i++)
a[indexj + i] /= a[indexjj];
}
#endregion

#region LUSolveFactored
// Compute the column vector P*B
for (i = 0; i < order; i++)
{
if (ipiv[i] == i)
continue;

int p = ipiv[i];
for (j = 0; j < columnsOfB; j++)
{
var temp = bIn[j * order + p];
bIn[j * order + p] = bIn[j * order + i];
bIn[j * order + i] = temp;
}
}

// Solve L*Y = P*B
for (k = 0; k < order; k++)
{
var korder = k * order;
for (i = k + 1; i < order; i++)
for (j = 0; j < columnsOfB; j++)
{
var index = j * order;
bIn[i + index] -= bIn[k + index] * a[i + korder];
}
}

// Solve U*X = Y;
for (k = order - 1; k >= 0; k--)
{
var korder = k + (k * order);
for (j = 0; j < columnsOfB; j++)
bIn[k + (j * order)] /= a[korder];

korder = k * order;
for (i = 0; i < k; i++)
for (j = 0; j < columnsOfB; j++)
bIn[i + j * order] -= bIn[k + j * order] * a[i + korder];
}
#endregion
}

6 votes
Vote
Sign in
Check!
(thinking…)
Reset
or sign in with
  • facebook
  • google
    Password icon
    Signed in as (Sign out)
    You have left! (?) (thinking…)
    Doug shared this idea  ·   ·  Flag idea as inappropriate…  ·  Admin →

    1 comment

    Sign in
    Check!
    (thinking…)
    Reset
    or sign in with
    • facebook
    • google
      Password icon
      Signed in as (Sign out)
      Submitting...
      • Doug commented  ·   ·  Flag as inappropriate

        Designed for HPC and the performance obsessed. We hit this class very hard with inner loops running billions of times by the end of a run. Tested up to A = 1000x1000 and B = 1000x8. It will run into stack out of memory issues for untested sizes.

        The stackalloc on the unsafe version is a significant heap (garbage collector) saver, and eliminating the array bounds checking in the rest of the code leads to significant performance gains.

        In a non-toy application this gives us a 20% performance boost.

      Feedback and Knowledge Base