Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Mallet topic model example can not compile

I want to compile mallet in my Java (instead using the command line), so I include the jar in my project, and cite the code of the example from: http://mallet.cs.umass.edu/topics-devel.php, however, when I run this code, there is error that :

Exception in thread "main" java.lang.NoClassDefFoundError: gnu/trove/TObjectIntHashMap
    at cc.mallet.types.Alphabet.<init>(Alphabet.java:51)
    at cc.mallet.types.Alphabet.<init>(Alphabet.java:70)
    at cc.mallet.pipe.TokenSequence2FeatureSequence.<init>    (TokenSequence2FeatureSequence.java:35)
at mallet.TopicModel.main(TopicModel.java:25)
Caused by: java.lang.ClassNotFoundException: gnu.trove.TObjectIntHashMap
at java.net.URLClassLoader$1.run(Unknown Source)
at java.net.URLClassLoader$1.run(Unknown Source)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
... 4 more

I am not sure what causes the error. Could anyone help?

package mallet;

import cc.mallet.util.*;
import cc.mallet.types.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.topics.*;

import java.util.*;
import java.util.regex.*;
import java.io.*;

public class TopicModel {

public static void main(String[] args) throws Exception {

    String filePath = "D:/ap.txt";
    // Begin by importing documents from text to feature sequences
    ArrayList<Pipe> pipeList = new ArrayList<Pipe>();

    // Pipes: lowercase, tokenize, remove stopwords, map to features
    pipeList.add( new CharSequenceLowercase() );
    pipeList.add( new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")) );
    pipeList.add( new TokenSequenceRemoveStopwords(new File("stoplists/en.txt"), "UTF-8", false, false, false) );
    pipeList.add( new TokenSequence2FeatureSequence() );

    InstanceList instances = new InstanceList (new SerialPipes(pipeList));

    Reader fileReader = new InputStreamReader(new FileInputStream(new File(filePath)), "UTF-8");
    instances.addThruPipe(new CsvIterator (fileReader, Pattern.compile("^(\\S*)[\\s,]*(\\S*)[\\s,]*(.*)$"),
                                           3, 2, 1)); // data, label, name fields

    // Create a model with 100 topics, alpha_t = 0.01, beta_w = 0.01
    //  Note that the first parameter is passed as the sum over topics, while
    //  the second is 
    int numTopics = 100;
    ParallelTopicModel model = new ParallelTopicModel(numTopics, 1.0, 0.01);

    model.addInstances(instances);

    // Use two parallel samplers, which each look at one half the corpus and combine
    //  statistics after every iteration.
    model.setNumThreads(2);

    // Run the model for 50 iterations and stop (this is for testing only, 
    //  for real applications, use 1000 to 2000 iterations)
    model.setNumIterations(50);
    model.estimate();

    // Show the words and topics in the first instance

    // The data alphabet maps word IDs to strings
    Alphabet dataAlphabet = instances.getDataAlphabet();

    FeatureSequence tokens = (FeatureSequence) model.getData().get(0).instance.getData();
    LabelSequence topics = model.getData().get(0).topicSequence;

    Formatter out = new Formatter(new StringBuilder(), Locale.US);
    for (int position = 0; position < tokens.getLength(); position++) {
        out.format("%s-%d ", dataAlphabet.lookupObject(tokens.getIndexAtPosition(position)), topics.getIndexAtPosition(position));
    }
    System.out.println(out);

    // Estimate the topic distribution of the first instance, 
    //  given the current Gibbs state.
    double[] topicDistribution = model.getTopicProbabilities(0);

    // Get an array of sorted sets of word ID/count pairs
    ArrayList<TreeSet<IDSorter>> topicSortedWords = model.getSortedWords();

    // Show top 5 words in topics with proportions for the first document
    for (int topic = 0; topic < numTopics; topic++) {
        Iterator<IDSorter> iterator = topicSortedWords.get(topic).iterator();

        out = new Formatter(new StringBuilder(), Locale.US);
        out.format("%d\t%.3f\t", topic, topicDistribution[topic]);
        int rank = 0;
        while (iterator.hasNext() && rank < 5) {
            IDSorter idCountPair = iterator.next();
            out.format("%s (%.0f) ", dataAlphabet.lookupObject(idCountPair.getID()), idCountPair.getWeight());
            rank++;
        }
        System.out.println(out);
    }

    // Create a new instance with high probability of topic 0
    StringBuilder topicZeroText = new StringBuilder();
    Iterator<IDSorter> iterator = topicSortedWords.get(0).iterator();

    int rank = 0;
    while (iterator.hasNext() && rank < 5) {
        IDSorter idCountPair = iterator.next();
        topicZeroText.append(dataAlphabet.lookupObject(idCountPair.getID()) + " ");
        rank++;
    }

    // Create a new instance named "test instance" with empty target and source fields.
    InstanceList testing = new InstanceList(instances.getPipe());
    testing.addThruPipe(new Instance(topicZeroText.toString(), null, "test instance", null));

    TopicInferencer inferencer = model.getInferencer();
    double[] testProbabilities = inferencer.getSampledDistribution(testing.get(0), 10, 1, 5);
    System.out.println("0\t" + testProbabilities[0]);
}

}

like image 709
flyingmouse Avatar asked Aug 18 '14 05:08

flyingmouse


3 Answers

I solved this problem. Firstly, I tried to import trove3.1 in my Eclipse but it does not work. Then, I noticed that in Mallet folder, there is "lib" folder, so I included all jar files in my Eclipse. Bingo! It works.

like image 103
flyingmouse Avatar answered Nov 05 '22 08:11

flyingmouse


Add the mallet.jar file in your classpath. Then go to the class folder of your mallet installation before invoking

java cc.mallet.examples.TopicModel
like image 26
Debasis Avatar answered Nov 05 '22 06:11

Debasis


You have to add all libs available in Mallet folder in addition to Mallet library. Download Mallet form here

like image 21
Wesam Na Avatar answered Nov 05 '22 06:11

Wesam Na