Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
 package org.apache.mahout.math.decomposer.lanczos;
 import java.util.Map;

Simple implementation of the Lanczos algorithm for finding eigenvalues of a symmetric matrix, applied to non-symmetric matrices by applying Matrix.timesSquared(vector) as the "matrix-multiplication" method.

See the SSVD code for a better option org.apache.mahout.math.ssvd.SequentialBigSvd See also the docs on stochastic projection SVD

To avoid floating point overflow problems which arise in power-methods like Lanczos, an initial pass is made through the input matrix to

  • generate a good starting seed vector by summing all the rows of the input matrix, and
  • compute the trace(inputMatrixt*matrix)

This latter value, being the sum of all of the singular values, is used to rescale the entire matrix, effectively forcing the largest singular value to be strictly less than one, and transforming floating point overflow problems into floating point underflow (ie, very small singular values will become invisible, as they will appear to be zero and the algorithm will terminate).

This implementation uses org.apache.mahout.math.solver.EigenDecomposition to do the eigenvalue extraction from the small (desiredRank x desiredRank) tridiagonal matrix. Numerical stability is achieved via brute-force: re-orthogonalization against all previous eigenvectors is computed after every pass. This can be made smarter if (when!) this proves to be a major bottleneck. Of course, this step can be parallelized as well.

 public class LanczosSolver {
   private static final Logger log = LoggerFactory.getLogger(LanczosSolver.class);
   public static final double SAFE_MAX = 1.0e150;
   public enum TimingSection {
   private final Map<TimingSectionLongstartTimes = new EnumMap<TimingSectionLong>(TimingSection.class);
   private final Map<TimingSectionLongtimes = new EnumMap<TimingSectionLong>(TimingSection.class);
   private static final class Scale extends DoubleFunction {
     private final double d;
     private Scale(double d) {
       this. = d;
     public double apply(double arg1) {
       return arg1 * ;
   public void solve(LanczosState state,
                     int desiredRank) {
   public void solve(LanczosState state,
                     int desiredRank,
                     boolean isSymmetric) {
    VectorIterable corpus = state.getCorpus();
    .info("Finding {} singular vectors of matrix with {} rows, via Lanczos",
    int i = state.getIterationNumber();
    Vector currentVector = state.getBasisVector(i - 1);
    Vector previousVector = state.getBasisVector(i - 2);
    double beta = 0;
    Matrix triDiag = state.getDiagonalMatrix();
    while (i < desiredRank) {
      Vector nextVector = isSymmetric ? corpus.times(currentVector) : corpus.timesSquared(currentVector);
      .info("{} passes through the corpus so far..."i);
      if (state.getScaleFactor() <= 0) {
      nextVector.assign(new Scale(1.0 / state.getScaleFactor()));
      if (previousVector != null) {
        nextVector.assign(previousVectornew PlusMult(-beta));
      // now orthogonalize
      double alpha =;
      nextVector.assign(currentVectornew PlusMult(-alpha));
      // and normalize
      beta = nextVector.norm(2);
      if (outOfRange(beta) || outOfRange(alpha)) {
        .warn("Lanczos parameters out of range: alpha = {}, beta = {}.  Bailing out early!",
      nextVector.assign(new Scale(1 / beta));
      previousVector = currentVector;
      currentVector = nextVector;
      // save the projections and norms!
      triDiag.set(i - 1, i - 1, alpha);
      if (i < desiredRank - 1) {
        triDiag.set(i - 1, ibeta);
        triDiag.set(ii - 1, beta);
    .info("Lanczos iteration complete - now to diagonalize the tri-diagonal auxiliary matrix.");
    // at this point, have tridiag all filled out, and basis is all filled out, and orthonormalized
    EigenDecomposition decomp = new EigenDecomposition(triDiag);
    Matrix eigenVects = decomp.getV();
    Vector eigenVals = decomp.getRealEigenvalues();
    for (int row = 0; row < irow++) {
      Vector realEigen = null;
      // the eigenvectors live as columns of V, in reverse order.  Weird but true.
      Vector ejCol = eigenVects.viewColumn(i - row - 1);
      int size = Math.min(ejCol.size(), state.getBasisSize());
      for (int j = 0; j < sizej++) {
        double d = ejCol.get(j);
        Vector rowJ = state.getBasisVector(j);
        if (realEigen == null) {
          realEigen =;
        realEigen.assign(rowJnew PlusMult(d));
      Preconditions.checkState(realEigen != null);
      assert realEigen != null;
      realEigen = realEigen.normalize();
      double e = eigenVals.get(row) * state.getScaleFactor();
      if (!isSymmetric) {
        e = Math.sqrt(e);
      .info("Eigenvector {} found with eigenvalue {}"rowe);
    .info("LanczosSolver finished.");
  protected static double calculateScaleFactor(Vector nextVector) {
    return nextVector.norm(2);
  private static boolean outOfRange(double d) {
    return Double.isNaN(d) || d >  || -d > ;
  protected static void orthoganalizeAgainstAllButLast(Vector nextVectorLanczosState state) {
    for (int i = 0; i < state.getIterationNumber(); i++) {
      Vector basisVector = state.getBasisVector(i);
      double alpha;
      if (basisVector == null || (alpha = == 0.0) {
      nextVector.assign(basisVectornew PlusMult(-alpha));
  private void startTime(TimingSection section) {
    .put(section, System.nanoTime());
  private void endTime(TimingSection section) {
    if (!.containsKey(section)) {
      .put(section, 0L);
    .put(section.get(section) + System.nanoTime() - .get(section));
New to GrepCode? Check out our FAQ X