Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to use SentiWordNet

I need to do sentiment analysis on some csv files containing tweets. I'm using SentiWordNet to do the sentiment analysis.

I got the following piece of sample java code they provided on their site. I'm not sure how to use it. The path of the csv file that I want to analyze is C:\Users\MyName\Desktop\tweets.csv . The path of the SentiWordNet_3.0.0.txt is C:\Users\MyName\Desktop\SentiWordNet_3.0.0\home\swn\www\admin\dump\SentiWordNet_3.0.0_20130122.txt . I'm new to java, pls help, thanks! The link to the sample java code below is this.

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;

public class SWN3 {
    private String pathToSWN = "data"+File.separator+"SentiWordNet_3.0.0.txt";
    private HashMap<String, String> _dict;

    public SWN3(){

        _dict = new HashMap<String, String>();
        HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
        try{
            BufferedReader csv =  new BufferedReader(new FileReader(pathToSWN));
            String line = "";           
            while((line = csv.readLine()) != null)
            {
                String[] data = line.split("\t");
                Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
                String[] words = data[4].split(" ");
                for(String w:words)
                {
                    String[] w_n = w.split("#");
                    w_n[0] += "#"+data[0];
                    int index = Integer.parseInt(w_n[1])-1;
                    if(_temp.containsKey(w_n[0]))
                    {
                        Vector<Double> v = _temp.get(w_n[0]);
                        if(index>v.size())
                            for(int i = v.size();i<index; i++)
                                v.add(0.0);
                        v.add(index, score);
                        _temp.put(w_n[0], v);
                    }
                    else
                    {
                        Vector<Double> v = new Vector<Double>();
                        for(int i = 0;i<index; i++)
                            v.add(0.0);
                        v.add(index, score);
                        _temp.put(w_n[0], v);
                    }
                }
            }
            Set<String> temp = _temp.keySet();
            for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
                String word = (String) iterator.next();
                Vector<Double> v = _temp.get(word);
                double score = 0.0;
                double sum = 0.0;
                for(int i = 0; i < v.size(); i++)
                    score += ((double)1/(double)(i+1))*v.get(i);
                for(int i = 1; i<=v.size(); i++)
                    sum += (double)1/(double)i;
                score /= sum;
                String sent = "";               
                if(score>=0.75)
                    sent = "strong_positive";
                else
                if(score > 0.25 && score<=0.5)
                    sent = "positive";
                else
                if(score > 0 && score>=0.25)
                    sent = "weak_positive";
                else
                if(score < 0 && score>=-0.25)
                    sent = "weak_negative";
                else
                if(score < -0.25 && score>=-0.5)
                    sent = "negative";
                else
                if(score<=-0.75)
                    sent = "strong_negative";
                _dict.put(word, sent);
            }
        }
        catch(Exception e){e.printStackTrace();}        
    }

    public String extract(String word, String pos)
    {
        return _dict.get(word+"#"+pos);
    }
}

Newcode:

public class SWN3 {
        private String pathToSWN = "C:\\Users\\MyName\\Desktop\\SentiWordNet_3.0.0\\home\\swn\\www\\admin\\dump\\SentiWordNet_3.0.0.txt";
    private HashMap<String, String> _dict;

    public SWN3(){

        _dict = new HashMap<String, String>();
        HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
        try{
            BufferedReader csv =  new BufferedReader(new FileReader(pathToSWN));
            String line = "";           
            while((line = csv.readLine()) != null)
            {
                String[] data = line.split("\t");
                Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]);
                String[] words = data[4].split(" ");
                for(String w:words)
                {
                    String[] w_n = w.split("#");
                    w_n[0] += "#"+data[0];
                    int index = Integer.parseInt(w_n[1])-1;
                    if(_temp.containsKey(w_n[0]))
                    {
                        Vector<Double> v = _temp.get(w_n[0]);
                        if(index>v.size())
                            for(int i = v.size();i<index; i++)
                                v.add(0.0);
                        v.add(index, score);
                        _temp.put(w_n[0], v);
                    }
                    else
                    {
                        Vector<Double> v = new Vector<Double>();
                        for(int i = 0;i<index; i++)
                            v.add(0.0);
                        v.add(index, score);
                        _temp.put(w_n[0], v);
                    }
                }
            }
            Set<String> temp = _temp.keySet();
            for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
                String word = (String) iterator.next();
                Vector<Double> v = _temp.get(word);
                double score = 0.0;
                double sum = 0.0;
                for(int i = 0; i < v.size(); i++)
                    score += ((double)1/(double)(i+1))*v.get(i);
                for(int i = 1; i<=v.size(); i++)
                    sum += (double)1/(double)i;
                score /= sum;
                String sent = "";               
                if(score>=0.75)
                    sent = "strong_positive";
                else
                if(score > 0.25 && score<=0.5)
                    sent = "positive";
                else
                if(score > 0 && score>=0.25)
                    sent = "weak_positive";
                else
                if(score < 0 && score>=-0.25)
                    sent = "weak_negative";
                else
                if(score < -0.25 && score>=-0.5)
                    sent = "negative";
                else
                if(score<=-0.75)
                    sent = "strong_negative";
                _dict.put(word, sent);
            }
        }
        catch(Exception e){e.printStackTrace();}        
    }

    public Double extract(String word)
    {
        Double total = new Double(0);
        if(_dict.get(word+"#n") != null)
             total = _dict.get(word+"#n") + total;
        if(_dict.get(word+"#a") != null)
            total = _dict.get(word+"#a") + total;
        if(_dict.get(word+"#r") != null)
            total = _dict.get(word+"#r") + total;
        if(_dict.get(word+"#v") != null)
            total = _dict.get(word+"#v") + total;
        return total;
    }

    public String classifytweet(){
        String[] words = twit.split("\\s+"); 
        double totalScore = 0, averageScore;
        for(String word : words) {
            word = word.replaceAll("([^a-zA-Z\\s])", "");
            if (_sw.extract(word) == null)
                continue;
            totalScore += _sw.extract(word);
        }
        Double AverageScore = totalScore;

        if(averageScore>=0.75)
            return "very positive";
        else if(averageScore > 0.25 && averageScore<0.5)
            return  "positive";
        else if(averageScore>=0.5)
            return  "positive";
        else if(averageScore < 0 && averageScore>=-0.25)
            return "negative";
        else if(averageScore < -0.25 && averageScore>=-0.5)
            return "negative";
        else if(averageScore<=-0.75)
            return "very negative";
        return "neutral";
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub
    }
like image 787
Belgarion Avatar asked Mar 27 '13 06:03

Belgarion


People also ask

How SentiWordNet works?

SentiWordNet is built in a two-stage approach: initially, WordNet term relationships such as synonym, antonym and hyponymy are explored to extend a core of seed words used in [19], and known a priori to carry positive or negative opinion bias.

What is SentiWordNet sentiment analysis?

Sentiment Analysis is the computational study of opinions, sentiments and emotions expressed in text. Earlier, most text information processing methods (e.g., web search, text mining) worked with factual information.


1 Answers

First of all start by deleting all the "garbage" at the first of the file (which includes description, instruction etc..)

One possible usage is to change SWN3 an make the method extract in it return a Double:

public Double extract(String word)
{
    Double total = new Double(0);
    if(_dict.get(word+"#n") != null)
         total = _dict.get(word+"#n") + total;
    if(_dict.get(word+"#a") != null)
        total = _dict.get(word+"#a") + total;
    if(_dict.get(word+"#r") != null)
        total = _dict.get(word+"#r") + total;
    if(_dict.get(word+"#v") != null)
        total = _dict.get(word+"#v") + total;
    return total;
}

Then, giving a String that you want to tag, you can split it so it'll have only words (with no signs and unknown chars) and using the result returned from extract method on each word, you can decide what is the average weight of the String:

String[] words = twit.split("\\s+"); 
double totalScore = 0, averageScore;
for(String word : words) {
    word = word.replaceAll("([^a-zA-Z\\s])", "");
    if (_sw.extract(word) == null)
        continue;
    totalScore += _sw.extract(word);
}
verageScore = totalScore;

if(averageScore>=0.75)
    return "very positive";
else if(averageScore > 0.25 && averageScore<0.5)
    return  "positive";
else if(averageScore>=0.5)
    return  "positive";
else if(averageScore < 0 && averageScore>=-0.25)
    return "negative";
else if(averageScore < -0.25 && averageScore>=-0.5)
    return "negative";
else if(averageScore<=-0.75)
    return "very negative";
return "neutral";

I found this way easier and it works fine for me.


UPDATE:

I changed _dict to _dict = new HashMap<String, Double>(); So it will have a String key and a Double value.

So I replaced _dict.put(word, sent); wish _dict.put(word, score);

like image 123
Maroun Avatar answered Oct 15 '22 10:10

Maroun