Start line:  
End line:  

Snippet Preview

Snippet HTML Code

Stack Overflow Questions
  package net.recommenders.rival.split.splitter;
  
  
  import java.util.HashSet;
  import java.util.List;
 import java.util.Set;

Splitter that takes into account the timestamps in the data (older interactions are kept only in the training set).

Author(s):
Alejandro
 
 public class TemporalSplitter implements Splitter<LongLong> {

    
The percentage of training to be used by the splitter.
 
     private float percentageTraining;
    
The flag that indicates if the split should be done in a per user basis.
 
     private boolean perUser;
    
The flag that indicates if the split should consider all the items independently.
 
     private boolean doSplitPerItems;

    
Constructor

Parameters:
percentageTraining percentage of training data to be split
perUser flag to do the split in a per user basis
doSplitPerItems if true, every interaction between a user and an item will be kept in the test set if at least one interaction belongs to the corresponding timestamp (according to the rest of the parameters)
 
     public TemporalSplitter(float percentageTrainingboolean perUserboolean doSplitPerItems) {
         this. = percentageTraining;
         this. = perUser;
         this. = doSplitPerItems;
     }
 
    @Override
     public DataModel<LongLong>[] split(DataModel<LongLongdata) {
         final DataModel<LongLong>[] splits = new DataModel[2];
         splits[0] = new DataModel<LongLong>(); // training
         splits[1] = new DataModel<LongLong>(); // test
         if () {
             for (Long user : data.getUsers()) {
                 if (!data.getUserItemTimestamps().containsKey(user)) {
                     continue;
                 }
                 Set<LonguserTimestamps = new HashSet<Long>();
                 for (Set<Longtimestamps : data.getUserItemTimestamps().get(user).values()) {
                     userTimestamps.addAll(timestamps);
                 }
                 List<LonglistTimestamps = new ArrayList<Long>(userTimestamps);
                 Collections.sort(listTimestamps);
                 int splitPoint = Math.round( * listTimestamps.size());
                 Set<LongtestTimestamps = new HashSet<Long>();
                 int n = 0;
                 for (Long t : listTimestamps) {
                     if (n > splitPoint) {
                         testTimestamps.add(t);
                     }
                     n++;
                 }
                 if () {
                     for (Entry<LongSet<Long>> e : data.getUserItemTimestamps().get(user).entrySet()) {
                         Long item = e.getKey();
                         Double pref = data.getUserItemPreferences().get(user).get(item);
                         boolean inTest = false;
                         for (Long time : e.getValue()) {
                             if (testTimestamps.contains(time)) {
                                 inTest = true;
                                 break;
                             }
                         }
                         DataModel<LongLongdatamodel = splits[0]; // training
                         if (inTest) {
                             datamodel = splits[1]; // test
                         }
                         if (pref != null) {
                             datamodel.addPreference(useritempref);
                         }
                         for (Long time : e.getValue()) {
                             datamodel.addTimestamp(useritemtime);
                         }
                     }
                 } else {
                     if (!data.getUserItemTimestamps().containsKey(user)) {
                         continue;
                     }
                    for (Entry<LongSet<Long>> e : data.getUserItemTimestamps().get(user).entrySet()) {
                        Long item = e.getKey();
                        Double pref = data.getUserItemPreferences().get(user).get(item);
                        for (Long time : e.getValue()) {
                            DataModel<LongLongdatamodel = splits[0]; // training
                            if (testTimestamps.contains(time)) {
                                datamodel = splits[1]; // test
                            }
                            if (pref != null) {
                                datamodel.addPreference(useritempref);
                            }
                            datamodel.addTimestamp(useritemtime);
                        }
                    }
                }
            }
        } else {
            // global temporal splitting
            Set<LongallTimestamps = new HashSet<Long>();
            for (Long user : data.getUserItemTimestamps().keySet()) {
                for (Set<Longtimestamps : data.getUserItemTimestamps().get(user).values()) {
                    allTimestamps.addAll(timestamps);
                }
            }
            List<LonglistTimestamps = new ArrayList<Long>(allTimestamps);
            Collections.sort(listTimestamps);
            int splitPoint = Math.round( * listTimestamps.size());
            Set<LongtestTimestamps = new HashSet<Long>();
            int n = 0;
            for (Long t : listTimestamps) {
                if (n > splitPoint) {
                    testTimestamps.add(t);
                }
                n++;
            }
            for (Long user : data.getUsers()) {
                if (!data.getUserItemTimestamps().containsKey(user)) {
                    continue;
                }
                for (Long item : data.getUserItemPreferences().get(user).keySet()) {
                    Double pref = data.getUserItemPreferences().get(user).get(item);
                    Set<Longtime = data.getUserItemTimestamps().get(user).get(item);
                    if () {
                        boolean inTest = false;
                        for (Long t : time) {
                            if (testTimestamps.contains(t)) {
                                inTest = true;
                                break;
                            }
                        }
                        DataModel<LongLongdatamodel = splits[0]; // training
                        if (inTest) {
                            datamodel = splits[1]; // test
                        }
                        if (pref != null) {
                            datamodel.addPreference(useritempref);
                        }
                        for (Long t : time) {
                            datamodel.addTimestamp(useritemt);
                        }
                    } else {
                        for (Long t : time) {
                            DataModel<LongLongdatamodel = splits[0]; // training
                            if (testTimestamps.contains(t)) {
                                datamodel = splits[1]; // test
                            }
                            if (pref != null) {
                                datamodel.addPreference(useritempref);
                            }
                            datamodel.addTimestamp(useritemt);
                        }
                    }
                }
            }
        }
        return splits;
    }
New to GrepCode? Check out our FAQ X