I have a X, distributed matrix, in RowMatrix form. I am using Spark 1.3.0. I need to be able to calculate X inverse.
import org.apache.spark.mllib.linalg.{Vectors,Vector,Matrix,SingularValueDecomposition,DenseMatrix,DenseVector}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
def computeInverse(X: RowMatrix): DenseMatrix = {
  val nCoef = X.numCols.toInt
  val svd = X.computeSVD(nCoef, computeU = true)
  if (svd.s.size < nCoef) {
    sys.error(s"RowMatrix.computeInverse called on singular matrix.")
  }
  // Create the inv diagonal matrix from S 
  val invS = DenseMatrix.diag(new DenseVector(svd.s.toArray.map(x => math.pow(x,-1))))
  // U cannot be a RowMatrix
  val U = new DenseMatrix(svd.U.numRows().toInt,svd.U.numCols().toInt,svd.U.rows.collect.flatMap(x => x.toArray))
  // If you could make V distributed, then this may be better. However its alreadly local...so maybe this is fine.
  val V = svd.V
  // inv(X) = V*inv(S)*transpose(U)  --- the U is already transposed.
  (V.multiply(invS)).multiply(U)
  }
I had problems using this function with option
conf.set("spark.sql.shuffle.partitions", "12")
The rows in RowMatrix got shuffled.
Here is an update that worked for me
import org.apache.spark.mllib.linalg.{DenseMatrix,DenseVector}
import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix
def computeInverse(X: IndexedRowMatrix)
: DenseMatrix = 
{
  val nCoef = X.numCols.toInt
  val svd = X.computeSVD(nCoef, computeU = true)
  if (svd.s.size < nCoef) {
    sys.error(s"IndexedRowMatrix.computeInverse called on singular matrix.")
  }
  // Create the inv diagonal matrix from S 
  val invS = DenseMatrix.diag(new DenseVector(svd.s.toArray.map(x => math.pow(x, -1))))
  // U cannot be a RowMatrix
  val U = svd.U.toBlockMatrix().toLocalMatrix().multiply(DenseMatrix.eye(svd.U.numRows().toInt)).transpose
  val V = svd.V
  (V.multiply(invS)).multiply(U)
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With