/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.topics.tui;

import cc.mallet.pipe.iterator.DBInstanceIterator;
import cc.mallet.topics.ParallelTopicModel;
import cc.mallet.topics.TopicModelDiagnostics;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.util.logging.Logger;

public class TopicTrainer {
    static CommandOption.String inputFile = new CommandOption.String(TopicTrainer.class, "input", "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
    static CommandOption.String inputModelFilename = new CommandOption.String(TopicTrainer.class, "input-model", "FILENAME", true, null, "The filename from which to read the binary topic model. The --input option is ignored. By default this is null, indicating that no file will be read.", null);
    static CommandOption.String inputStateFilename = new CommandOption.String(TopicTrainer.class, "input-state", "FILENAME", true, null, "The filename from which to read the gzipped Gibbs sampling state created by --output-state. The original input file must be included, using --input. By default this is null, indicating that no file will be read.", null);
    static CommandOption.String outputModelFilename = new CommandOption.String(TopicTrainer.class, "output-model", "FILENAME", true, null, "The filename in which to write the binary topic model at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String stateFile = new CommandOption.String(TopicTrainer.class, "output-state", "FILENAME", true, null, "The filename in which to write the Gibbs sampling state after at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.Integer outputModelInterval = new CommandOption.Integer(TopicTrainer.class, "output-model-interval", "INTEGER", true, 0, "The number of iterations between writing the model (and its Gibbs sampling state) to a binary file.  You must also set the --output-model to use this option, whose argument will be the prefix of the filenames.", null);
    static CommandOption.Integer outputStateInterval = new CommandOption.Integer(TopicTrainer.class, "output-state-interval", "INTEGER", true, 0, "The number of iterations between writing the sampling state to a text file.  You must also set the --output-state to use this option, whose argument will be the prefix of the filenames.", null);
    static CommandOption.String inferencerFilename = new CommandOption.String(TopicTrainer.class, "inferencer-filename", "FILENAME", true, null, "A topic inferencer applies a previously trained topic model to new documents.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String evaluatorFilename = new CommandOption.String(TopicTrainer.class, "evaluator-filename", "FILENAME", true, null, "A held-out likelihood evaluator for new documents.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicKeysFile = new CommandOption.String(TopicTrainer.class, "output-topic-keys", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.Integer topWords = new CommandOption.Integer(TopicTrainer.class, "num-top-words", "INTEGER", true, 20, "The number of most probable words to print for each topic after model estimation.", null);
    static CommandOption.Integer showTopicsInterval = new CommandOption.Integer(TopicTrainer.class, "show-topics-interval", "INTEGER", true, 50, "The number of iterations between printing a brief summary of the topics so far.", null);
    static CommandOption.String topicWordWeightsFile = new CommandOption.String(TopicTrainer.class, "topic-word-weights-file", "FILENAME", true, null, "The filename in which to write unnormalized weights for every topic and word type.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String wordTopicCountsFile = new CommandOption.String(TopicTrainer.class, "word-topic-counts-file", "FILENAME", true, null, "The filename in which to write a sparse representation of topic-word assignments.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String diagnosticsFile = new CommandOption.String(TopicTrainer.class, "diagnostics-file", "FILENAME", true, null, "The filename in which to write measures of topic quality, in XML format.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicReportXMLFile = new CommandOption.String(TopicTrainer.class, "xml-topic-report", "FILENAME", true, null, "The filename in which to write the top words for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicPhraseReportXMLFile = new CommandOption.String(TopicTrainer.class, "xml-topic-phrase-report", "FILENAME", true, null, "The filename in which to write the top words and phrases for each topic and any Dirichlet parameters in XML format.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.String topicDocsFile = new CommandOption.String(TopicTrainer.class, "output-topic-docs", "FILENAME", true, null, "The filename in which to write the most prominent documents for each topic, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.Integer numTopDocs = new CommandOption.Integer(TopicTrainer.class, "num-top-docs", "INTEGER", true, 100, "When writing topic documents with --output-topic-docs, report this number of top documents.", null);
    static CommandOption.String docTopicsFile = new CommandOption.String(TopicTrainer.class, "output-doc-topics", "FILENAME", true, null, "The filename in which to write the topic proportions per document, at the end of the iterations.  By default this is null, indicating that no file will be written.", null);
    static CommandOption.Double docTopicsThreshold = new CommandOption.Double(TopicTrainer.class, "doc-topics-threshold", "DECIMAL", true, 0.0, "When writing topic proportions per document with --output-doc-topics, do not print topics with proportions less than this threshold value.", null);
    static CommandOption.Integer docTopicsMax = new CommandOption.Integer(TopicTrainer.class, "doc-topics-max", "INTEGER", true, -1, "When writing topic proportions per document with --output-doc-topics, do not print more than INTEGER number of topics.  A negative value indicates that all topics should be printed.", null);
    static CommandOption.Integer numTopics = new CommandOption.Integer(TopicTrainer.class, "num-topics", "INTEGER", true, 10, "The number of topics to fit.", null);
    static CommandOption.Integer numThreads = new CommandOption.Integer(TopicTrainer.class, "num-threads", "INTEGER", true, 1, "The number of threads for parallel training.", null);
    static CommandOption.Integer numIterations = new CommandOption.Integer(TopicTrainer.class, "num-iterations", "INTEGER", true, 1000, "The number of iterations of Gibbs sampling.", null);
    static CommandOption.Integer numMaximizationIterations = new CommandOption.Integer(TopicTrainer.class, "num-icm-iterations", "INTEGER", true, 0, "The number of iterations of iterated conditional modes (topic maximization).", null);
    static CommandOption.Boolean noInference = new CommandOption.Boolean(TopicTrainer.class, "no-inference", "true|false", false, false, "Do not perform inference, just load a saved model and create a report. Equivalent to --num-iterations 0.", null);
    static CommandOption.Integer randomSeed = new CommandOption.Integer(TopicTrainer.class, "random-seed", "INTEGER", true, 0, "The random seed for the Gibbs sampler.  Default is 0, which will use the clock.", null);
    static CommandOption.Integer optimizeInterval = new CommandOption.Integer(TopicTrainer.class, "optimize-interval", "INTEGER", true, 0, "The number of iterations between reestimating dirichlet hyperparameters.", null);
    static CommandOption.Integer optimizeBurnIn = new CommandOption.Integer(TopicTrainer.class, "optimize-burn-in", "INTEGER", true, 200, "The number of iterations to run before first estimating dirichlet hyperparameters.", null);
    static CommandOption.Boolean useSymmetricAlpha = new CommandOption.Boolean(TopicTrainer.class, "use-symmetric-alpha", "true|false", false, false, "Optimize the concentration parameter (SumAlpha) of the prior over document-topic distributions while keeping it symmetric. This may reduce the number of very small, poorly estimated topics, but may disperse common words over several topics.", null);
    static CommandOption.Double alpha = new CommandOption.Double(TopicTrainer.class, "alpha", "DECIMAL", true, 5.0, "SumAlpha parameter: sum over topics of smoothing over doc-topic distributions. alpha_k = [this value] / [num topics]", null);
    static CommandOption.Double beta = new CommandOption.Double(TopicTrainer.class, "beta", "DECIMAL", true, 0.01, "Beta parameter: smoothing parameter for each topic-word. beta_w = [this value]", null);
    private static Logger logger = MalletLogger.getLogger(TopicTrainer.class.getName());

    public static void main(String[] args) throws IOException {
        ObjectOutputStream oos;
        PrintWriter out;
        CommandOption.setSummary(TopicTrainer.class, "A tool for estimating, saving and printing diagnostics for topic models, such as LDA.");
        try {
            CommandOption.process(TopicTrainer.class, args);
        }
        catch (IllegalArgumentException e) {
            logger.warning("");
            logger.warning(e.getMessage());
            System.exit(0);
        }
        ParallelTopicModel topicModel = null;
        if (TopicTrainer.inputModelFilename.value != null) {
            try {
                topicModel = ParallelTopicModel.read(new File(TopicTrainer.inputModelFilename.value));
            }
            catch (Exception e) {
                logger.warning("Unable to restore saved topic model " + TopicTrainer.inputModelFilename.value + ": " + e);
                System.exit(1);
            }
        } else {
            topicModel = new ParallelTopicModel(TopicTrainer.numTopics.value, TopicTrainer.alpha.value, TopicTrainer.beta.value);
        }
        if (TopicTrainer.randomSeed.value != 0) {
            topicModel.setRandomSeed(TopicTrainer.randomSeed.value);
        }
        if (TopicTrainer.inputFile.value != null) {
            Object data;
            InstanceList training = null;
            try {
                training = TopicTrainer.inputFile.value.startsWith("db:") ? DBInstanceIterator.getInstances(TopicTrainer.inputFile.value.substring(3)) : InstanceList.load(new File(TopicTrainer.inputFile.value));
            }
            catch (Exception e) {
                logger.warning("Unable to restore instance list " + TopicTrainer.inputFile.value + ": " + e);
                System.exit(1);
            }
            logger.info("Data loaded.");
            if (training.size() > 0 && training.get(0) != null && !((data = ((Instance)training.get(0)).getData()) instanceof FeatureSequence)) {
                logger.warning("Topic modeling currently only supports feature sequences: use --keep-sequence option when importing data.");
                System.exit(1);
            }
            topicModel.addInstances(training);
        }
        if (TopicTrainer.inputStateFilename.value != null) {
            logger.info("Initializing from saved state.");
            topicModel.initializeFromState(new File(TopicTrainer.inputStateFilename.value));
        }
        topicModel.setTopicDisplay(TopicTrainer.showTopicsInterval.value, TopicTrainer.topWords.value);
        topicModel.setNumIterations(TopicTrainer.numIterations.value);
        topicModel.setOptimizeInterval(TopicTrainer.optimizeInterval.value);
        topicModel.setBurninPeriod(TopicTrainer.optimizeBurnIn.value);
        topicModel.setSymmetricAlpha(TopicTrainer.useSymmetricAlpha.value);
        if (TopicTrainer.outputStateInterval.value != 0) {
            topicModel.setSaveState(TopicTrainer.outputStateInterval.value, TopicTrainer.stateFile.value);
        }
        if (TopicTrainer.outputModelInterval.value != 0) {
            topicModel.setSaveSerializedModel(TopicTrainer.outputModelInterval.value, TopicTrainer.outputModelFilename.value);
        }
        topicModel.setNumThreads(TopicTrainer.numThreads.value);
        if (!noInference.value()) {
            topicModel.estimate();
        }
        if (TopicTrainer.numMaximizationIterations.value > 0) {
            topicModel.maximize(TopicTrainer.numMaximizationIterations.value);
        }
        if (TopicTrainer.topicKeysFile.value != null) {
            topicModel.printTopWords(new File(TopicTrainer.topicKeysFile.value), TopicTrainer.topWords.value, false);
        }
        if (TopicTrainer.diagnosticsFile.value != null) {
            out = new PrintWriter(TopicTrainer.diagnosticsFile.value);
            TopicModelDiagnostics diagnostics = new TopicModelDiagnostics(topicModel, TopicTrainer.topWords.value);
            out.println(diagnostics.toXML());
            out.close();
        }
        if (TopicTrainer.topicReportXMLFile.value != null) {
            out = new PrintWriter(TopicTrainer.topicReportXMLFile.value);
            topicModel.topicXMLReport(out, TopicTrainer.topWords.value);
            out.close();
        }
        if (TopicTrainer.topicPhraseReportXMLFile.value != null) {
            out = new PrintWriter(TopicTrainer.topicPhraseReportXMLFile.value);
            topicModel.topicPhraseXMLReport(out, TopicTrainer.topWords.value);
            out.close();
        }
        if (TopicTrainer.stateFile.value != null && TopicTrainer.outputStateInterval.value == 0) {
            topicModel.printState(new File(TopicTrainer.stateFile.value));
        }
        if (TopicTrainer.topicDocsFile.value != null) {
            out = new PrintWriter(new FileWriter(new File(TopicTrainer.topicDocsFile.value)));
            topicModel.printTopicDocuments(out, TopicTrainer.numTopDocs.value);
            out.close();
        }
        if (TopicTrainer.docTopicsFile.value != null) {
            out = new PrintWriter(new FileWriter(new File(TopicTrainer.docTopicsFile.value)));
            if (TopicTrainer.docTopicsThreshold.value == 0.0) {
                topicModel.printDenseDocumentTopics(out);
            } else {
                topicModel.printDocumentTopics(out, TopicTrainer.docTopicsThreshold.value, TopicTrainer.docTopicsMax.value);
            }
            out.close();
        }
        if (TopicTrainer.topicWordWeightsFile.value != null) {
            topicModel.printTopicWordWeights(new File(TopicTrainer.topicWordWeightsFile.value));
        }
        if (TopicTrainer.wordTopicCountsFile.value != null) {
            topicModel.printTypeTopicCounts(new File(TopicTrainer.wordTopicCountsFile.value));
        }
        if (TopicTrainer.outputModelFilename.value != null) {
            assert (topicModel != null);
            try {
                oos = new ObjectOutputStream(new FileOutputStream(TopicTrainer.outputModelFilename.value));
                oos.writeObject(topicModel);
                oos.close();
            }
            catch (Exception e) {
                logger.warning("Couldn't write topic model to filename " + TopicTrainer.outputModelFilename.value);
            }
        }
        if (TopicTrainer.inferencerFilename.value != null) {
            try {
                oos = new ObjectOutputStream(new FileOutputStream(TopicTrainer.inferencerFilename.value));
                oos.writeObject(topicModel.getInferencer());
                oos.close();
            }
            catch (Exception e) {
                logger.warning("Couldn't create inferencer: " + e.getMessage());
            }
        }
        if (TopicTrainer.evaluatorFilename.value != null) {
            try {
                oos = new ObjectOutputStream(new FileOutputStream(TopicTrainer.evaluatorFilename.value));
                oos.writeObject(topicModel.getProbEstimator());
                oos.close();
            }
            catch (Exception e) {
                logger.warning("Couldn't create evaluator: " + e.getMessage());
            }
        }
    }
}

