Documents

Https Issues.apache.org Jira Secure Attachment 12467251 MAHOUT-572

Description
Apache
Categories
Published
of 18
All materials on our website are shared by users. If you have any questions about copyright issues, please report us to resolve them. We are always happy to assist you.
Related Documents
Share
Transcript
  10/15/2014 https://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patchhttps://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patch 1/18 Index: core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizerTest.java===================================================================--- core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizerTest.java (Revision 0)+++ core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizerTest.java (Revision 0)@@ -0,0 +1,81 @@+/**+ * Licensed to the Apache Software Foundation (ASF) under one or more+ * contributor license agreements. See the NOTICE file distributed with+ * this work for additional information regarding copyright ownership.+ * The ASF licenses this file to You under the Apache License, Version 2.0+ * (the License ); you may not use this file except in compliance with+ * the License. You may obtain a copy of the License at+ *+ * http://www.apache.org/licenses/LICENSE-2.0+ *+ * Unless required by applicable law or agreed to in writing, software+ * distributed under the License is distributed on an AS IS BASIS,+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.+ * See the License for the specific language governing permissions and+ * limitations under the License.+ */++package org.apache.mahout.cf.taste.impl.recommender.svd;++import org.apache.mahout.cf.taste.impl.TasteTestCase;+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;+import org.apache.mahout.cf.taste.impl.common.RunningAverage;+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;+import org.apache.mahout.cf.taste.impl.model.GenericPreference;+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;+import org.apache.mahout.cf.taste.model.DataModel;+import org.apache.mahout.cf.taste.model.Preference;+import org.apache.mahout.cf.taste.model.PreferenceArray;+import org.junit.Test;++import java.util.Arrays;++public class ALSWRFactorizerTest extends TasteTestCase {++ @Test+ public void toyExample() throws Exception {++ FastByIDMap<PreferenceArray> userData = new FastByIDMap<PreferenceArray>();++ userData.put(1l, new GenericUserPreferenceArray(Arrays.asList(new Preference[] {+ new GenericPreference(1l, 1l, 5f),+ new GenericPreference(1l, 2l, 5f),+ new GenericPreference(1l, 3l, 2f) })));++ userData.put(2l, new GenericUserPreferenceArray(Arrays.asList(new Preference[] {+ new GenericPreference(2l, 1l, 2f),+ new GenericPreference(2l, 3l, 3f),+ new GenericPreference(2l, 4l, 5f) })));++ userData.put(3l, new GenericUserPreferenceArray(Arrays.asList(new Preference[] {+ new GenericPreference(3l, 2l, 5f),+ new GenericPreference(3l, 4l, 3f) })));  10/15/2014 https://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patchhttps://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patch 2/18 ++ userData.put(4l, new GenericUserPreferenceArray(Arrays.asList(new Preference[] {+ new GenericPreference(4l, 1l, 3f),+ new GenericPreference(4l, 4l, 5f) })));++ DataModel dataModel = new GenericDataModel(userData);++ Factorizer factorizer = new ALSWRFactorizer(dataModel, 10, 0.065, 25);++ SVDRecommender svdRecommender = new SVDRecommender(dataModel, factorizer);++ RunningAverage avg = new FullRunningAverage();+ LongPrimitiveIterator userIDs = dataModel.getUserIDs();+ while (userIDs.hasNext()) {+ long userID = userIDs.nextLong();+ for (Preference pref : dataModel.getPreferencesFromUser(userID)) {+ double rating = pref.getValue();+ double estimate = svdRecommender.estimatePreference(userID, pref.getItemID());+ double err = rating - estimate;+ avg.addDatum(err * err);+ }+ }++ double rmse = Math.sqrt(avg.getAverage());+ assertTrue(rmse < 0.2d);+ }+}Index: core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java===================================================================--- core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (Revision 0)+++ core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (Revision 0)@@ -0,0 +1,172 @@+/**+ * Licensed to the Apache Software Foundation (ASF) under one or more+ * contributor license agreements. See the NOTICE file distributed with+ * this work for additional information regarding copyright ownership.+ * The ASF licenses this file to You under the Apache License, Version 2.0+ * (the License ); you may not use this file except in compliance with+ * the License. You may obtain a copy of the License at+ *+ * http://www.apache.org/licenses/LICENSE-2.0+ *+ * Unless required by applicable law or agreed to in writing, software+ * distributed under the License is distributed on an AS IS BASIS,+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.+ * See the License for the specific language governing permissions and+ * limitations under the License.+ */++package org.apache.mahout.cf.taste.impl.recommender.svd;++import org.apache.mahout.cf.taste.common.TasteException;+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;+import org.apache.mahout.cf.taste.impl.common.RunningAverage;+import org.apache.mahout.cf.taste.model.DataModel;+import org.apache.mahout.cf.taste.model.Preference;+import org.apache.mahout.cf.taste.model.PreferenceArray;+import org.apache.mahout.common.RandomUtils;+import org.apache.mahout.math.DenseVector;+import org.apache.mahout.math.Vector;  10/15/2014 https://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patchhttps://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patch 3/18 +import org.apache.mahout.math.als.AlternateLeastSquaresSolver;+import org.slf4j.Logger;+import org.slf4j.LoggerFactory;++import java.util.ArrayList;+import java.util.List;+import java.util.Random;++/**+ * factorizes the rating matrix using Alternating-Least-Squares with Weighted-λ-Regularization as described in+ * the paper Large-scale Collaborative Filtering for the Netflix Prize available at+ * {@see http://www.hpl.hp.com/personal/Robert_Schreiber/papers/2008%20AAIM%20Netflix/netflix_aaim08(submitted).pdf}+ */+public class ALSWRFactorizer implements Factorizer {++ private final DataModel dataModel;++ private final FastByIDMap<Integer> userIDMapping;+ private final FastByIDMap<Integer> itemIDMapping;++ /** number of features used to compute this factorization */+ private final int numFeatures;+ /** parameter to control the regularization */+ private final double lambda;+ /** number of iterations */+ private final int numIterations;++ private static final Logger log = LoggerFactory.getLogger(ALSWRFactorizer.class);++ public ALSWRFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations) throws TasteException {+ this.dataModel = dataModel;+ this.numFeatures = numFeatures;+ this.lambda = lambda;+ this.numIterations = numIterations;++ userIDMapping = createIDMapping(dataModel.getNumUsers(), dataModel.getUserIDs());+ itemIDMapping = createIDMapping(dataModel.getNumItems(), dataModel.getItemIDs());+ }++ @Override+ public Factorization factorize() throws TasteException {++ log.info( starting to compute the factorization... );+ AlternateLeastSquaresSolver solver = new AlternateLeastSquaresSolver();++ double[][] M = initializeM();+ double[][] Mnext = null;+ double[][] U = null;++ for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) {+ log.info( iteration {} , currentIteration);+ if (currentIteration > 0) {+ M = Mnext;+ }++ /* fix M - compute U */+ U = new double[dataModel.getNumUsers()][numFeatures];++ LongPrimitiveIterator userIDsIterator = dataModel.getUserIDs();+ while (userIDsIterator.hasNext()) {+ long userID = userIDsIterator.nextLong();  10/15/2014 https://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patchhttps://issues.apache.org/jira/secure/attachment/12467251/MAHOUT-572.patch 4/18 + List<Vector> featureVectors = new ArrayList<Vector>();+ LongPrimitiveIterator itemIDsFromUser = dataModel.getItemIDsFromUser(userID).iterator();+ while (itemIDsFromUser.hasNext()) {+ long itemID = itemIDsFromUser.nextLong();+ featureVectors.add(new DenseVector(M[itemIDMapping.get(itemID)]));+ }+ Vector userFeatures = solver.solve(featureVectors, ratingVector(dataModel.getPreferencesFromUser(userID)),+ lambda, numFeatures);+ setFeatureColumn(U, userIDMapping.get(userID), userFeatures);+ }++ /* fix U - compute M */+ Mnext = new double[dataModel.getNumItems()][numFeatures];++ LongPrimitiveIterator itemIDsIterator = dataModel.getItemIDs();+ while (itemIDsIterator.hasNext()) {+ long itemID = itemIDsIterator.nextLong();+ List<Vector> featureVectors = new ArrayList<Vector>();+ for (Preference pref : dataModel.getPreferencesForItem(itemID)) {+ long userID = pref.getUserID();+ featureVectors.add(new DenseVector(U[userIDMapping.get(userID)]));+ }+ Vector itemFeatures = solver.solve(featureVectors, ratingVector(dataModel.getPreferencesForItem(itemID)),+ lambda, numFeatures);+ setFeatureColumn(Mnext, itemIDMapping.get(itemID), itemFeatures);+ }+ }++ log.info( finished computation of the factorization... );+ return new Factorization(userIDMapping, itemIDMapping, U, Mnext);+ }++ double[][] initializeM() throws TasteException {+ Random random = RandomUtils.getRandom();+ double[][] M = new double[dataModel.getNumItems()][numFeatures];++ LongPrimitiveIterator itemIDsIterator = dataModel.getItemIDs();+ while (itemIDsIterator.hasNext()) {+ long itemID = itemIDsIterator.nextLong();+ int itemIDIndex = itemIDMapping.get(itemID);+ M[itemIDIndex][0] = averateRating(itemID);+ for (int n = 1; n < numFeatures; n++) {+ M[itemIDIndex][n] = random.nextDouble() * 0.1;+ }+ }+ return M;+ }++ void setFeatureColumn(double[][] matrix, int idIndex, Vector vector) {+ for (int feature = 0; feature < numFeatures; feature++) {+ matrix[idIndex][feature] = vector.get(feature);+ }+ }++ Vector ratingVector(PreferenceArray prefs) {+ double[] ratings = new double[prefs.length()];+ for (int n = 0; n < prefs.length(); n++) {+ ratings[n] = prefs.get(n).getValue();+ }+ return new DenseVector(ratings);+ }+
Search
Tags
Related Search
We Need Your Support
Thank you for visiting our website and your interest in our free products and services. We are nonprofit website to share and download documents. To the running of this website, we need your help to support us.

Thanks to everyone for your continued support.

No, Thanks