package com.databricks.spark.sql.perf.mllib.clustering;

import com.databricks.spark.sql.perf.MLMetric;
import com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm;
import com.databricks.spark.sql.perf.mllib.MLBenchContext;
import com.databricks.spark.sql.perf.mllib.OptionImplicits$;
import com.databricks.spark.sql.perf.mllib.TestFromTraining;
import org.apache.commons.math3.random.Well19937c;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.Transformer;
import org.apache.spark.ml.linalg.Vectors$;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import scala.Function0;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashMap$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichLong;

/* compiled from: LDA.scala */
/* loaded from: input_file:com/databricks/spark/sql/perf/mllib/clustering/LDA$.class */
public final class LDA$ implements BenchmarkAlgorithm, TestFromTraining {
    public static LDA$ MODULE$;

    static {
        new LDA$();
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm, com.databricks.spark.sql.perf.mllib.TestFromTraining
    public final Dataset<Row> testDataSet(MLBenchContext mLBenchContext) {
        Dataset<Row> testDataSet;
        testDataSet = testDataSet(mLBenchContext);
        return testDataSet;
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm, com.databricks.spark.sql.perf.mllib.ScoringWithEvaluator
    public MLMetric score(MLBenchContext mLBenchContext, Dataset<Row> dataset, Transformer transformer) throws Exception {
        MLMetric score;
        score = score(mLBenchContext, dataset, transformer);
        return score;
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm
    public String name() {
        String name;
        name = name();
        return name;
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm
    public Map<String, Function0<?>> testAdditionalMethods(MLBenchContext mLBenchContext, Transformer transformer) {
        Map<String, Function0<?>> testAdditionalMethods;
        testAdditionalMethods = testAdditionalMethods(mLBenchContext, transformer);
        return testAdditionalMethods;
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm, com.databricks.spark.sql.perf.mllib.TrainingSetFromTransformer
    public Dataset<Row> trainingDataSet(MLBenchContext mLBenchContext) {
        RDD parallelize = mLBenchContext.sqlContext().sparkContext().parallelize(new RichLong(Predef$.MODULE$.longWrapper(0L)).until(BoxesRunTime.boxToLong(OptionImplicits$.MODULE$.oL2L(mLBenchContext.params().numExamples()))), OptionImplicits$.MODULE$.oI2I(mLBenchContext.params().numPartitions()), ClassTag$.MODULE$.Long());
        int oI2I = OptionImplicits$.MODULE$.oI2I(mLBenchContext.params().randomSeed());
        int unboxToInt = BoxesRunTime.unboxToInt(mLBenchContext.params().docLength().get());
        int unboxToInt2 = BoxesRunTime.unboxToInt(mLBenchContext.params().vocabSize().get());
        RDD mapPartitionsWithIndex = parallelize.mapPartitionsWithIndex((obj, iterator) -> {
            return $anonfun$trainingDataSet$1(oI2I, unboxToInt, unboxToInt2, BoxesRunTime.unboxToInt(obj), iterator);
        }, parallelize.mapPartitionsWithIndex$default$2(), ClassTag$.MODULE$.apply(Tuple2.class));
        return mLBenchContext.sqlContext().createDataFrame(mapPartitionsWithIndex, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.databricks.spark.sql.perf.mllib.clustering.LDA$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple2"), new $colon.colon(mirror.staticClass("scala.Long").asType().toTypeConstructor(), new $colon.colon(mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor(), Nil$.MODULE$)));
            }
        })).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"docIndex", "features"}));
    }

    @Override // com.databricks.spark.sql.perf.mllib.BenchmarkAlgorithm
    public PipelineStage getPipelineStage(MLBenchContext mLBenchContext) {
        return new org.apache.spark.ml.clustering.LDA().setK(OptionImplicits$.MODULE$.oI2I(mLBenchContext.params().k())).setSeed(OptionImplicits$.MODULE$.oI2I(mLBenchContext.params().randomSeed())).setMaxIter(OptionImplicits$.MODULE$.oI2I(mLBenchContext.params().maxIter())).setOptimizer(OptionImplicits$.MODULE$.oS2S(mLBenchContext.params().optimizer()));
    }

    public static final /* synthetic */ Tuple2 $anonfun$trainingDataSet$2(int i, Well19937c well19937c, int i2, long j) {
        HashMap apply = HashMap$.MODULE$.apply(Nil$.MODULE$);
        for (int i3 = 0; i3 < i; i3++) {
            int nextInt = well19937c.nextInt(i2);
            apply.update(BoxesRunTime.boxToInteger(nextInt), BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(apply.getOrElse(BoxesRunTime.boxToInteger(nextInt), () -> {
                return 0;
            })) + 1));
        }
        return new Tuple2(BoxesRunTime.boxToLong(j), Vectors$.MODULE$.sparse(i2, (Seq) apply.toSeq().map(tuple2 -> {
            return new Tuple2.mcID.sp(tuple2._1$mcI$sp(), tuple2._2$mcI$sp());
        }, Seq$.MODULE$.canBuildFrom())));
    }

    public static final /* synthetic */ Iterator $anonfun$trainingDataSet$1(int i, int i2, int i3, int i4, Iterator iterator) {
        Well19937c well19937c = new Well19937c(i ^ i4);
        return iterator.map(obj -> {
            return $anonfun$trainingDataSet$2(i2, well19937c, i3, BoxesRunTime.unboxToLong(obj));
        });
    }

    private LDA$() {
        MODULE$ = this;
        BenchmarkAlgorithm.$init$(this);
        TestFromTraining.$init$(this);
    }
}
