Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  /*-
   * See the file LICENSE for redistribution information.
   *
   * Copyright (c) 2002, 2013 Oracle and/or its affiliates.  All rights reserved.
   *
   */
  
  package com.sleepycat.je.tree;
  
 import java.util.List;
 
Estimates the number of non-deleted BIN entries between two end points, using information returned in TrackingInfo from Tree.getParentINForChildIN. Used for estimating dup counts, e.g., for join query optimization. Accuracy is limited by the number of samples taken to compute the average number of entries at each level. Currently only two samples (at the end points) are taken, and because the tree is not balanced making the number of entries highly variable, the count can easily be off by a factor of two.
 
 public class CountEstimator {
 
     /* If exceeded, there must be a bug of some kind. */
     private static final int MAX_RETRIES_AFTER_SPLIT = 100;

    
Returns an estimate of the number of records between two end points specified by begin/end cursor positions.
 
     public static long count(DatabaseImpl dbImpl,
                              CursorImpl beginCursor,
                              boolean beginInclusive,
                              CursorImpl endCursor,
                              boolean endInclusive) {
 
         /* If the two cursors are at the same position, return 1. */
         if (beginCursor.getBIN()== endCursor.getBIN() &&
             beginCursor.getIndex() == endCursor.getIndex()) {
             return 1;
         }
 
         /* Compute estimate for cursors at different positions. */
         final CountEstimator estimator = new CountEstimator(dbImpl);
         return estimator.count(beginCursorendCursor) +
                (beginInclusive ? 1 : 0) +
                (endInclusive ? 1 : 0);
     }
 
     private final DatabaseImpl dbImpl;
     private List<TrackingInfobeginStack;
     private List<TrackingInfoendStack;
     private final List<List<TrackingInfo>> avgEntriesStacks =
         new ArrayList<List<TrackingInfo>>();
     private int levelCount;
     private int rootLevel;
     private float[] avgEntries;
 
     private CountEstimator(DatabaseImpl dbImpl) {
         this. = dbImpl;
     }
     
     private long count(CursorImpl beginCursorCursorImpl endCursor) {
         for (int numRetries = 0;; numRetries += 1) {
 
             /*
              * If we have retried too many times, give up.  This is probably
              * due to a bug of some kind, and we shouldn't loop forever.
              */
             if (numRetries > ) {
                 throw EnvironmentFailureException.unexpectedState();
             }
 
             /*
              * Set up the initial info for the computation.  Retry if a split
              * occurs.
              */
              = getParentStack(beginCursor);
             if ( == null) {
                 continue;
             }
              = getParentStack(endCursor);
             if ( == null) {
                 continue;
             }
             if (!findCommonAncestor()) {
                 continue;
             }
 
             /* Get the the average entries from the two end points.  */
             getAvgEntries(beginCursorendCursor);
 
            /*
             * Return the count.  FUTURE: Taking more samples between the two
             * end points would increase accuracy.
             */
            return countTotal();
        }
    }

    
Returns the stack of ancestor TrackingInfo for the BIN at the cursor, or null if a split occurs and the information returned would be inconsistent.
    private List<TrackingInfogetParentStack(CursorImpl cursor) {
        /*
         * Search for parent of BIN, get TrackingInfo for ancestors.  If the
         * exact parent is not found, a split occurred and null is returned.
         */
        final List<TrackingInfotrackingList = new ArrayList<TrackingInfo>();
        final BIN bin = cursor.latchBIN();
        final SearchResult result = .getTree().getParentINForChildIN
            (bintrue /*requireExactMatch*/.,
             -1 /*targetLevel*/trackingList);
        if (!result.exactParentFound) {
            /* Must have been a split. */
            return null;
        }
        /*
         * The parent was found and is now latched.  If the child BIN does not
         * match the cursor's BIN, then a split occurred and null is returned.
         */
        final long binLsn;
        try {
            if (bin != result.parent.getTarget(result.index) ||
                bin != cursor.getBIN()) {
                /* Must have been a split. */
                return null;
            }
            binLsn = result.parent.getLsn(result.index);
            bin.latch();
        } finally {
            result.parent.releaseLatch();
        }
        /*
         * The child BIN is now latched. Subtract deleted entries from BIN's
         * total entries and adjust the index accordingly.  Add TrackingInfo
         * for child BIN.
         */
        try {
            int binEntries = bin.getNEntries();
            int binIndex = cursor.getIndex();
            for (int i = bin.getNEntries() - 1; i >= 0; i -= 1) {
                if (bin.isEntryKnownDeleted(i) ||
                    bin.isEntryPendingDeleted(i)) {
                    binEntries -= 1;
                    if (i < binIndex) {
                        binIndex -= 1;
                    }
                }
            }
            final TrackingInfo info =
                new TrackingInfo(binLsnbin.getNodeId(), binEntries);
            info.setIndex(binIndex);
            trackingList.add(info);
            return trackingList;
        } finally {
            bin.releaseLatch();
        }
    }

    
Find the common ancestor node for the two end points, which we'll call the root level. If no common ancestor can be found, return false to restart the process, because a split must have occurred in between getting the two stacks for the end points.
    private boolean findCommonAncestor() {
         = .size();
        if ( != .size()) {
            /* Must have been a root split. */
            return false;
        }
         = -1;
        for (int level =  - 1; level >= 0; level -= 1) {
            if (.get(level). ==
                .get(level).) {
                 = level;
                break;
            }
        }
        if ( < 0) {
            /* Must have been a split. */
            return false;
        }
        return true;
    }

    
This method starts with a preliminary average using just the two end points.
    private void getAvgEntries(CursorImpl beginCursorCursorImpl endCursor) {
        .clear();
        if (!addAvgEntriesSample()) {
            sampleNextBIN(beginCursortrue /*moveForward*/);
        }
        if (!addAvgEntriesSample()) {
            sampleNextBIN(endCursorfalse /*moveForward*/);
        }
        computeAvgEntries();
    }

    
FUTURE: use internal skip method instead, saving a btree lookup.
    private void sampleNextBIN(CursorImpl beginOrEndCursor,
                               boolean moveForward) {
        final CursorImpl cursor = beginOrEndCursor.cloneCursor
            (true /*addCursor*/.);
        try {
            final BIN bin = cursor.latchBIN();
            if (moveForward) {
                cursor.setIndex(bin.getNEntries() - 1);
            } else {
                cursor.setIndex(0);
            }
            final OperationStatus status = cursor.getNext
                (null /*foundKey*/null /*foundData*/.,
                 moveForwardtrue /*alreadyLatched*/,
                 null /*rangeConstraint*/);
            if (status == . && cursor.getBIN() != bin) {
                final List<TrackingInfostack = getParentStack(cursor);
                if (stack != null) {
                    addAvgEntriesSample(stack);
                }
            }
        } finally {
            cursor.close();
        }
    }

    
At each level we compute the average number of entries. This will be used as a multipler to estimate the number of nodes for any IN at that level.
    private void computeAvgEntries() {
         = new float[];
        [ - 1] = 1.0F;
        if (.size() == 0) {
            return;
        }
        for (int level =  - 1; level > 0; level -= 1) {
            long totalEntries = 0;
            for (List<TrackingInfostack : ) {
                totalEntries += stack.get(level).;
            }
            final float avg = totalEntries / ((float.size());
            [level - 1] = avg * [level];
        }
    }
    private boolean addAvgEntriesSample(List<TrackingInfostack) {
        if (isFirstBIN(stack) || isLastBIN(stack)) {
            return false;
        }
        .add(stack);
        return true;
    }
    private boolean isFirstBIN(List<TrackingInfostack) {
        for (int i = 0; i < stack.size() - 1; i += 1) {
            final TrackingInfo info = stack.get(i);
            if (info.index != 0) {
                return false;
            }
        }
        return true;
    }
    private boolean isLastBIN(List<TrackingInfostack) {
        for (int i = 0; i < stack.size() - 1; i += 1) {
            final TrackingInfo info = stack.get(i);
            if (info.index != info.entries - 1) {
                return false;
            }
        }
        return true;
    }

    
Count the total for each node that is between the two end points.
    private long countTotal() {
        long total = 0;
        /* Add nodes between the end points at the root level. */
        final int rootIndex1 = .get(). + 1;
        final int rootIndex2 = .get().;
        if (rootIndex2 > rootIndex1) {
            total += Math.round((rootIndex2 - rootIndex1) *
                                []);
        }
        /* Add nodes under the end points at lower levels. */
        for (int level =  + 1; level < level += 1) {
            /* Add nodes under left end point that are to its right. */
            final int leftIndex = .get(level).;
            final int lastIndex = .get(level). - 1;
            if (lastIndex > leftIndex) {
                total += Math.round((lastIndex - leftIndex) *
                                    [level]);
            }
            /* Add nodes under right end point that are to its left. */
            final int rightIndex = .get(level).;
            final int firstIndex = 0;
            if (rightIndex > firstIndex) {
                total += Math.round((rightIndex - firstIndex) *
                                    [level]);
            }
        }
        return total;
    }
    /* For future use, if getKeyRatios is exposed in the API. */
    static class KeyRatios {
        final double less;
        final double equal;
        final double greater;
        KeyRatios(double lessdouble equaldouble greater) {
            this. = less;
            this. = equal;
            this. = greater;
        }
        @Override
        public String toString() {
            return "less: " +  +
                   " equal: " +  +
                   " greater: " + ;
        }
    }
    /*
     * For future use, if getKeyRatios is exposed in the API.  Be sure to test
     * boundary conditions when index is 0 or nEntries.
     *
     * Algorithm copied from __bam_key_range in BDB btree/bt_stat.c.
     */
    static KeyRatios getKeyRatios(List<TrackingInfoinfoByLevel,
                                  boolean exact) {
        double factor = 1.0;
        double less = 0.0;
        double greater = 0.0;
        /*
         * At each level we know that INs greater than index contain keys
         * greater than what we are looking for and those less than index are
         * less than.  The one pointed to by index may have some less, some
         * greater or even equal.  If index is equal to the number of entries,
         * then the key is out of range and everything is less.
         */
        for (final TrackingInfo info : infoByLevel) {
            if (info.index == 0) {
                greater += (factor * (info.entries - 1)) / info.entries;
            } else if (info.index == info.entries) {
                less += factor;
            } else {
                less += (factor * info.index) / info.entries;
                greater += (factor * ((info.entries - info.index) - 1)) /
                           info.entries;
            }
            /* Factor at next level down is 1/n'th the amount at this level. */
            factor /= info.entries;
            /*
            System.out.println("factor: " + factor +
                               " less: " + less +
                               " greater: " + greater);
            */
        }
        /*
         * If there was an exact match then assign the 1/n'th factor to the key
         * itself.  Otherwise that factor belongs to those greater than the
         * key, unless the key was out of range.
         */
        final double equal;
        if (exact) {
            equal = factor;
        } else {
            if (less != 1.0) {
                greater += factor;
            }
            equal = 0.0;
        }
        return new KeyRatios(lessequalgreater);
    }
New to GrepCode? Check out our FAQ X