Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
   * this work for additional information regarding copyright ownership.
   * The ASF licenses this file to You under the Apache License, Version 2.0
   * (the "License"); you may not use this file except in compliance with
   * the License.  You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
 package org.apache.mahout.math;
 
 import java.io.File;
 import java.util.List;
 
Provides a way to get data from a file and treat it as if it were a matrix, but avoids putting all that data onto the Java heap. Instead, the file is mapped into non-heap memory as a DoubleBuffer and we access that instead. The interesting aspect of this is that the values in the matrix are binary and sparse so we don't need to store the actual data, just the location of non-zero values.

Currently file data is formatted as follows:

  • A magic number to indicate the file format.
  • The size of the matrix (max rows and columns possible)
  • Number of non-zeros in each row.
  • A list of non-zero columns for each row. The list starts with a count and then has column numbers

It would be preferable to use something like protobufs to define the format so that we can use different row formats for different kinds of data. For instance, Golay coding of column numbers or compressed bit vectors might be good representations for some purposes.

 
 public final class FileBasedSparseBinaryMatrix extends AbstractMatrix {
   private static final int MAGIC_NUMBER_V0 = 0x12d7067d;
 
   private final List<IntBufferdata = Lists.newArrayList();
   private int[] bufferIndex;
   private int[] rowOffset;
   private int[] rowSize;

  
Constructs an empty matrix of the given size.

Parameters:
rows The number of rows in the result.
columns The number of columns in the result.
 
   public FileBasedSparseBinaryMatrix(int rowsint columns) {
     super(rowscolumns);
   }
 
   public void setData(File fthrows IOException {
     List<ByteBufferbuffers = Lists.newArrayList();
     FileChannel input = new FileInputStream(f).getChannel();
 
     buffers.add(input.map(.., 0, Math.min(.f.length())));
     .add(buffers.get(0).asIntBuffer());
     Preconditions.checkArgument(buffers.get(0).getInt() == "Wrong type of file");
 
     int rows = buffers.get(0).getInt();
     int cols = buffers.get(0).getInt();
     Preconditions.checkArgument(rows == rowSize());
     Preconditions.checkArgument(cols == columnSize());
 
      = new int[rows];
      = new int[rows];
      = new int[rows];
 
     int offset = 12 + 4 * rows;
     for (int i = 0; i < rowsi++) {
       int size = buffers.get(0).getInt();
       int buffer = 0;
       while (buffer < buffers.size()) {
         if (offset + size * 4 <= buffers.get(buffer).limit()) {
           break;
         } else {
           offset -= buffers.get(buffer).capacity();
         }
      }
      if (buffer == buffers.size()) {
        buffers.add(input.map(.., 0, Math.min(.f.length() - offset)));
        .add(buffers.get(buffer).asIntBuffer());
      }
      [i] = offset / 4;
      [i] = size;
      [i] = buffer;
//      final SparseBinaryVector v = new SparseBinaryVector(buffers.get(buffer), columns, offset, size);
//      this.rows.add(v);
      offset += size * 4;
    }
  }
  public static void writeMatrix(File fMatrix mthrows IOException {
    Preconditions.checkArgument(f.canWrite(), "Can't write to output file");
    FileOutputStream fos = new FileOutputStream(f);
    // write header
    DataOutputStream out = new DataOutputStream(fos);
    out.writeInt(m.rowSize());
    out.writeInt(m.columnSize());
    // compute offsets and write row headers
    for (MatrixSlice row : m) {
      int nondefaultElements = row.vector().getNumNondefaultElements();
      out.writeInt(nondefaultElements);
    }
    // write rows
    for (MatrixSlice row : m) {
      List<Integercolumns = Lists.newArrayList(Iterables.transform(row.vector().nonZeroes(),
        new Function<Vector.ElementInteger>() {
          @Override
          public Integer apply(Vector.Element element) {
            return element.index();
          }
        }));
      Collections.sort(columns);
      for (Integer column : columns) {
        out.writeInt(column);
      }
    }
    out.close();
    fos.close();
  }

  
Assign the other vector values to the column of the receiver

Parameters:
column the int row to assign
other a Vector
Returns:
the modified receiver
Throws:
CardinalityException if the cardinalities differ
  public Matrix assignColumn(int columnVector other) {
    throw new UnsupportedOperationException("Default operation");
  }

  
Assign the other vector values to the row of the receiver

Parameters:
row the int row to assign
other a Vector
Returns:
the modified receiver
Throws:
CardinalityException if the cardinalities differ
  public Matrix assignRow(int rowVector other) {
    throw new UnsupportedOperationException("Default operation");
  }

  
Return the value at the given indexes, without checking bounds

Parameters:
rowIndex an int row index
columnIndex an int column index
Returns:
the double at the index
  public double getQuick(int rowIndexint columnIndex) {
    IntBuffer tmp = .get([rowIndex]).asReadOnlyBuffer();
    tmp.position([rowIndex]);
    tmp.limit([rowIndex]);
    tmp = tmp.slice();
    return searchForIndex(tmpcolumnIndex);
  }
  private static double searchForIndex(IntBuffer rowint columnIndex) {
    int high = row.limit();
    if (high == 0) {
      return 0;
    }
    int low = 0;
    while (high > low) {
      int mid = (low + high) / 2;
      if (row.get(mid) < columnIndex) {
        low = mid + 1;
      } else {
        high = mid;
      }
    }
    if (low >= row.limit()) {
      return 0;
    } else if (high == low && row.get(low) == columnIndex) {
      return 1;
    } else {
      return 0;
    }
  }

  
Return an empty matrix of the same underlying class as the receiver

Returns:
a Matrix
  public Matrix like() {
    throw new UnsupportedOperationException("Default operation");
  }

  
Returns an empty matrix of the same underlying class as the receiver and of the specified size.

Parameters:
rows the int number of rows
columns the int number of columns
  public Matrix like(int rowsint columns) {
    return new DenseMatrix(rowscolumns);
  }

  
Set the value at the given index, without checking bounds

Parameters:
row an int row index into the receiver
column an int column index into the receiver
value a double value to set
  public void setQuick(int rowint columndouble value) {
    throw new UnsupportedOperationException("Default operation");
  }

  
Return a view into part of a matrix. Changes to the view will change the original matrix.

Parameters:
offset an int[2] offset into the receiver
size the int[2] size of the desired result
Returns:
a matrix that shares storage with part of the original matrix.
Throws:
CardinalityException if the length is greater than the cardinality of the receiver
IndexException if the offset is negative or the offset+length is outside of the receiver
  public Matrix viewPart(int[] offsetint[] size) {
    throw new UnsupportedOperationException("Default operation");
  }

  
Returns a view of a row. Changes to the view will affect the original.

Parameters:
rowIndex Which row to return.
Returns:
A vector that references the desired row.
  public Vector viewRow(int rowIndex) {
    IntBuffer tmp = .get([rowIndex]).asReadOnlyBuffer();
    tmp.position([rowIndex]);
    tmp.limit([rowIndex] + [rowIndex]);
    tmp = tmp.slice();
    return new SparseBinaryVector(tmpcolumnSize());
  }
  private static class SparseBinaryVector extends AbstractVector {
    private final IntBuffer buffer;
    private final int maxIndex;
    private SparseBinaryVector(IntBuffer bufferint maxIndex) {
      super(maxIndex);
      this. = buffer;
      this. = maxIndex;
    }
    SparseBinaryVector(ByteBuffer rowint maxIndexint offsetint size) {
      super(maxIndex);
      row = row.asReadOnlyBuffer();
      row.position(offset);
      row.limit(offset + size * 4);
      row = row.slice();
      this. = row.slice().asIntBuffer();
      this. = maxIndex;
    }

    
Subclasses must override to return an appropriately sparse or dense result

Parameters:
rows the row cardinality
columns the column cardinality
Returns:
a Matrix
    @Override
    protected Matrix matrixLike(int rowsint columns) {
      throw new UnsupportedOperationException("Default operation");
    }

    
Used internally by assign() to update multiple indices and values at once. Only really useful for sparse vectors (especially SequentialAccessSparseVector).

If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector.

Parameters:
updates a mapping of indices to values to merge in the vector.
    @Override
    public void mergeUpdates(OrderedIntDoubleMapping updates) {
      throw new UnsupportedOperationException("Cannot mutate SparseBinaryVector");
    }

    

Returns:
true iff this implementation should be considered dense -- that it explicitly represents every value
    @Override
    public boolean isDense() {
      return false;
    }

    

Returns:
true iff this implementation should be considered to be iterable in index order in an efficient way. In particular this implies that iterator() and iterateNonZero() return elements in ascending order by index.
    @Override
    public boolean isSequentialAccess() {
      return true;
    }

    
Iterates over all elements NOTE: Implementations may choose to reuse the Element returned for performance reasons, so if you need a copy of it, you should call AbstractVector.getElement(int) for the given index

Returns:
An java.util.Iterator over all elements
    @Override
    public Iterator<Elementiterator() {
      return new AbstractIterator<Element>() {
        int i = 0;
        @Override
        protected Element computeNext() {
          if ( < ) {
            return new Element() {
              int index = ++;
              

Returns:
the value of this vector element.
              @Override
              public double get() {
                return getQuick();
              }

              

Returns:
the index of this vector element.
              @Override
              public int index() {
                return ;
              }

              

Parameters:
value Set the current element to value.
              @Override
              public void set(double value) {
                throw new UnsupportedOperationException("Default operation");
              }
            };
          } else {
            return endOfData();
          }
        }
      };
    }

    
Iterates over all non-zero elements.

NOTE: Implementations may choose to reuse the Element returned for performance reasons, so if you need a copy of it, you should call AbstractVector.getElement(int) for the given index

Returns:
An java.util.Iterator over all non-zero elements
    @Override
    public Iterator<ElementiterateNonZero() {
      return new AbstractIterator<Element>() {
        int i = 0;
        @Override
        protected Element computeNext() {
          if ( < .limit()) {
            return new BinaryReadOnlyElement(.get(++));
          } else {
            return endOfData();
          }
        }
      };
    }

  
Return the value at the given index, without checking bounds

Parameters:
index an int index
Returns:
the double at the index
    @Override
    public double getQuick(int index) {
      return searchForIndex(index);
    }

    
Return an empty vector of the same underlying class as the receiver

Returns:
a Vector
    @Override
    public Vector like() {
      return new RandomAccessSparseVector(size());
    }

    
Copy the vector for fast operations.

Returns:
a Vector
    @Override
    protected Vector createOptimizedCopy() {
      return new RandomAccessSparseVector(size()).assign(this);
    }

    
Set the value at the given index, without checking bounds

Parameters:
index an int index into the receiver
value a double value to set
    @Override
    public void setQuick(int indexdouble value) {
      throw new UnsupportedOperationException("Read-only view");
    }

    
Set the value at the given index, without checking bounds

Parameters:
index an int index into the receiver
increment a double value to set
    @Override
    public void incrementQuick(int indexdouble increment) {
      throw new UnsupportedOperationException("Read-only view");
    }

    
Return the number of values in the recipient which are not the default value. For instance, for a sparse vector, this would be the number of non-zero values.

Returns:
an int
    @Override
    public int getNumNondefaultElements() {
      return .limit();
    }
    @Override
    public double getLookupCost() {
      return 1;
    }
    @Override
    public double getIteratorAdvanceCost() {
      return 1;
    }
    @Override
    public boolean isAddConstantTime() {
      throw new UnsupportedOperationException("Can't add binary value");
    }
  }
  public static class BinaryReadOnlyElement implements Vector.Element {
    private final int index;
    public BinaryReadOnlyElement(int index) {
      this. = index;
    }

    

Returns:
the value of this vector element.
    @Override
    public double get() {
      return 1;
    }

    

Returns:
the index of this vector element.
    @Override
    public int index() {
      return ;
    }

    

Parameters:
value Set the current element to value.
    @Override
    public void set(double value) {
      throw new UnsupportedOperationException("Can't set binary value");
    }
  }
New to GrepCode? Check out our FAQ X