Merge pull request #14447 from Sacha0/trimatldivspvec

andreasnoack · andreasnoack · commit 82f3ad445aaa · 2016-01-27T11:54:44.000-05:00
methods for left-division ops involving triangular matrices and sparse vectors
diff --git a/base/sparse.jl b/base/sparse.jl
@@ -7,7 +7,8 @@ using Base.Sort: Forward
 using Base.LinAlg: AbstractTriangular, PosDefException
 
 import Base: +, -, *, \, &, |, $, .+, .-, .*, ./, .\, .^, .<, .!=, ==
-import Base: A_mul_B!, Ac_mul_B, Ac_mul_B!, At_mul_B, At_mul_B!, A_ldiv_B!
+import Base: A_mul_B!, Ac_mul_B, Ac_mul_B!, At_mul_B, At_mul_B!, At_ldiv_B, Ac_ldiv_B, A_ldiv_B!
+import Base.LinAlg: At_ldiv_B!, Ac_ldiv_B!
 
 import Base: @get!, acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh,
     atan, atand, atanh, broadcast!, chol, conj!, cos, cosc, cosd, cosh, cospi, cot,
diff --git a/base/sparse/sparsevector.jl b/base/sparse/sparsevector.jl
@@ -1492,3 +1492,127 @@ function _At_or_Ac_mul_B{TvA,TiA,TvX,TiX}(tfun::BinaryOp, A::SparseMatrixCSC{TvA
     end
     SparseVector(n, ynzind, ynzval)
 end
+
+# define matrix division operations involving triangular matrices and sparse vectors
+# the valid left-division operations are A[t|c]_ldiv_B[!] and \
+# the valid right-division operations are A(t|c)_rdiv_B[t|c][!]
+# see issue #14005 for discussion of these methods
+for isunittri in (true, false), islowertri in (true, false)
+    unitstr = isunittri ? "Unit" : ""
+    halfstr = islowertri ? "Lower" : "Upper"
+    tritype = :(Base.LinAlg.$(symbol(string(unitstr, halfstr, "Triangular"))))
+
+    # build out-of-place left-division operations
+    for (istrans, func, ipfunc) in (
+        (false, :(\),         :(A_ldiv_B!)),
+        (true,  :(At_ldiv_B), :(At_ldiv_B!)),
+        (true,  :(Ac_ldiv_B), :(Ac_ldiv_B!)) )
+
+        # broad method where elements are Numbers
+        @eval function ($func){TA<:Number,Tb<:Number,S}(A::$tritype{TA,S}, b::SparseVector{Tb})
+            TAb = $(isunittri ?
+                :(typeof(zero(TA)*zero(Tb) + zero(TA)*zero(Tb))) :
+                :(typeof((zero(TA)*zero(Tb) + zero(TA)*zero(Tb))/one(TA))) )
+            ($ipfunc)(convert(AbstractArray{TAb}, A), convert(Array{TAb}, b))
+        end
+
+        # faster method requiring good view support of the
+        # triangular matrix type. hence the StridedMatrix restriction.
+        @eval function ($func){TA<:Number,Tb<:Number,S<:StridedMatrix}(A::$tritype{TA,S}, b::SparseVector{Tb})
+            TAb = $(isunittri ?
+                :(typeof(zero(TA)*zero(Tb) + zero(TA)*zero(Tb))) :
+                :(typeof((zero(TA)*zero(Tb) + zero(TA)*zero(Tb))/one(TA))) )
+            r = convert(Array{TAb}, b)
+            # this operation involves only b[nzrange], so we extract
+            # and operate on solely that section for efficiency
+            nzrange = $( (islowertri && !istrans) || (!islowertri && istrans) ?
+                :(b.nzind[1]:b.n) :
+                :(1:b.nzind[end]) )
+            nzrangeviewr = sub(r, nzrange)
+            nzrangeviewA = $tritype(sub(A.data, nzrange, nzrange))
+            ($ipfunc)(convert(AbstractArray{TAb}, nzrangeviewA), nzrangeviewr)
+            r
+        end
+
+        # fallback where elements are not Numbers
+        @eval ($func){TA,Tb,S}(A::$tritype{TA,S}, b::SparseVector{Tb}) = ($ipfunc)(A, copy(b))
+    end
+
+    # build in-place left-division operations
+    for (istrans, func) in (
+        (false, :(A_ldiv_B!)),
+        (true,  :(At_ldiv_B!)),
+        (true,  :(Ac_ldiv_B!)) )
+
+        # the generic in-place left-division methods handle these cases, but
+        # we can achieve greater efficiency where the triangular matrix provides
+        # good view support. hence the StridedMatrix restriction.
+        @eval function ($func){TA,Tb,S<:StridedMatrix}(A::$tritype{TA,S}, b::SparseVector{Tb})
+            # densify the relevant part of b in one shot rather
+            # than potentially repeatedly reallocating during the solve
+            $( (islowertri && !istrans) || (!islowertri && istrans) ?
+                :(_densifyfirstnztoend!(b)) :
+                :(_densifystarttolastnz!(b)) )
+            # this operation involves only the densified section, so
+            # for efficiency we extract and operate on solely that section
+            # furthermore we operate on that section as a dense vector
+            # such that dispatch has a chance to exploit, e.g., tuned BLAS
+            nzrange = $( (islowertri && !istrans) || (!islowertri && istrans) ?
+                :(b.nzind[1]:b.n) :
+                :(1:b.nzind[end]) )
+            nzrangeviewbnz = sub(b.nzval, nzrange - b.nzind[1] + 1)
+            nzrangeviewA = $tritype(sub(A.data, nzrange, nzrange))
+            ($func)(nzrangeviewA, nzrangeviewbnz)
+            # could strip any miraculous zeros here perhaps
+            b
+        end
+    end
+end
+
+# helper functions for in-place matrix division operations defined above
+"Densifies `x::SparseVector` from its first nonzero (`x[x.nzind[1]]`) through its end (`x[x.n]`)."
+function _densifyfirstnztoend!(x::SparseVector)
+    # lengthen containers
+    oldnnz = nnz(x)
+    newnnz = x.n - x.nzind[1] + 1
+    resize!(x.nzval, newnnz)
+    resize!(x.nzind, newnnz)
+    # redistribute nonzero values over lengthened container
+    # initialize now-allocated zero values simultaneously
+    nextpos = newnnz
+    @inbounds for oldpos in oldnnz:-1:1
+        nzi = x.nzind[oldpos]
+        nzv = x.nzval[oldpos]
+        newpos = nzi - x.nzind[1] + 1
+        newpos < nextpos && (x.nzval[newpos+1:nextpos] = 0)
+        newpos == oldpos && break
+        x.nzval[newpos] = nzv
+        nextpos = newpos - 1
+    end
+    # finally update lengthened nzinds
+    x.nzind[2:end] = (x.nzind[1]+1):x.n
+    x
+end
+"Densifies `x::SparseVector` from its beginning (`x[1]`) through its last nonzero (`x[x.nzind[end]]`)."
+function _densifystarttolastnz!(x::SparseVector)
+    # lengthen containers
+    oldnnz = nnz(x)
+    newnnz = x.nzind[end]
+    resize!(x.nzval, newnnz)
+    resize!(x.nzind, newnnz)
+    # redistribute nonzero values over lengthened container
+    # initialize now-allocated zero values simultaneously
+    nextpos = newnnz
+    @inbounds for oldpos in oldnnz:-1:1
+        nzi = x.nzind[oldpos]
+        nzv = x.nzval[oldpos]
+        nzi < nextpos && (x.nzval[nzi+1:nextpos] = 0)
+        nzi == oldpos && (nextpos = 0; break)
+        x.nzval[nzi] = nzv
+        nextpos = nzi - 1
+    end
+    nextpos > 0 && (x.nzval[1:nextpos] = 0)
+    # finally update lengthened nzinds
+    x.nzind[1:newnnz] = 1:newnnz
+    x
+end
diff --git a/test/sparsedir/sparsevector.jl b/test/sparsedir/sparsevector.jl
@@ -724,6 +724,66 @@ let A = complex(sprandn(7, 8, 0.5), sprandn(7, 8, 0.5)),
     @test_approx_eq full(y) Af'x2f
 end
 
+# left-division operations involving triangular matrices and sparse vectors (#14005)
+let m = 10
+    sparsefloatvecs = SparseVector[sprand(m, 0.4) for k in 1:3]
+    sparseintvecs = SparseVector[SparseVector(m, sprvec.nzind, round(Int, sprvec.nzval*10)) for sprvec in sparsefloatvecs]
+    sparsecomplexvecs = SparseVector[SparseVector(m, sprvec.nzind, complex(sprvec.nzval, sprvec.nzval)) for sprvec in sparsefloatvecs]
+
+    sprmat = sprand(m, m, 0.2)
+    sparsefloatmat = speye(m) + sprmat/(2m)
+    sparsecomplexmat = speye(m) + SparseMatrixCSC(m, m, sprmat.colptr, sprmat.rowval, complex(sprmat.nzval, sprmat.nzval)/(4m))
+    sparseintmat = speye(Int, m)*10m + SparseMatrixCSC(m, m, sprmat.colptr, sprmat.rowval, round(Int, sprmat.nzval*10))
+
+    denseintmat = eye(Int, m)*10m + rand(1:m, m, m)
+    densefloatmat = eye(m) + randn(m, m)/(2m)
+    densecomplexmat = eye(m) + complex(randn(m, m), randn(m, m))/(4m)
+
+    inttypes = (Int32, Int64, BigInt)
+    floattypes = (Float32, Float64, BigFloat)
+    complextypes = (Complex{Float32}, Complex{Float64})
+    eltypes = (inttypes..., floattypes..., complextypes...)
+
+    for eltypemat in eltypes
+        (densemat, sparsemat) = eltypemat in inttypes ? (denseintmat, sparseintmat) :
+                                eltypemat in floattypes ? (densefloatmat, sparsefloatmat) :
+                                eltypemat in complextypes && (densecomplexmat, sparsecomplexmat)
+        densemat = convert(Matrix{eltypemat}, densemat)
+        sparsemat = convert(SparseMatrixCSC{eltypemat}, sparsemat)
+        trimats = (LowerTriangular(densemat), UpperTriangular(densemat),
+                   LowerTriangular(sparsemat), UpperTriangular(sparsemat) )
+        unittrimats = (Base.LinAlg.UnitLowerTriangular(densemat), Base.LinAlg.UnitUpperTriangular(densemat),
+                       Base.LinAlg.UnitLowerTriangular(sparsemat), Base.LinAlg.UnitUpperTriangular(sparsemat) )
+
+        for eltypevec in eltypes
+            spvecs = eltypevec in inttypes ? sparseintvecs :
+                     eltypevec in floattypes ? sparsefloatvecs :
+                     eltypevec in complextypes && sparsecomplexvecs
+            spvecs = SparseVector[SparseVector(m, spvec.nzind, convert(Vector{eltypevec}, spvec.nzval)) for spvec in spvecs]
+
+            for spvec in spvecs
+                fspvec = convert(Array, spvec)
+                # test out-of-place left-division methods
+                for mat in (trimats..., unittrimats...), func in (\, At_ldiv_B, Ac_ldiv_B)
+                    @test isapprox((func)(mat, spvec), (func)(mat, fspvec))
+                end
+                # test in-place left-division methods not involving quotients
+                if eltypevec == typeof(zero(eltypemat)*zero(eltypevec) + zero(eltypemat)*zero(eltypevec))
+                    for mat in unittrimats, func in (A_ldiv_B!, Base.LinAlg.At_ldiv_B!, Base.LinAlg.Ac_ldiv_B!)
+                        @test isapprox((func)(mat, copy(spvec)), (func)(mat, copy(fspvec)))
+                    end
+                end
+                # test in-place left-division methods involving quotients
+                if eltypevec == typeof((zero(eltypemat)*zero(eltypevec) + zero(eltypemat)*zero(eltypevec))/one(eltypemat))
+                    for mat in trimats, func in (A_ldiv_B!, Base.LinAlg.At_ldiv_B!, Base.LinAlg.Ac_ldiv_B!)
+                        @test isapprox((func)(mat, copy(spvec)), (func)(mat, copy(fspvec)))
+                    end
+                end
+            end
+        end
+    end
+end
+
 # It's tempting to share data between a SparseVector and a SparseArrays,
 # but if that's done, then modifications to one or the other will cause
 # an inconsistent state: