| ellomoto |
Nov 26th, 2004 2:04 PM |
Hi all,
im trying to build a text classification program that reads in a text file, tokenises it and stores it in a hashmap. Along the way i would like to instantiate another class which reads a text file full of commonwords. I would then like to remove these common words from the initial hashmap. I have set up two classes one called CommonWords.java and the other called Train.java. I am getting an error which i have posted after my code.
Train.java
:
import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;
public class Train {
public FileReader file;
public StreamTokenizer st;
public HashMap counts = new HashMap();
public Hashtable cword;
/** Creates a new instance of train */
public Train() {
String fileName = "SPAM.txt";
String data;
HashMap counts = new HashMap();
int tokenType = 0;
int numberOfTokens = 0;
try
{
FileReader file = new FileReader(fileName);
StreamTokenizer st = new StreamTokenizer(new BufferedReader(file));
st.ordinaryChar('!');
st.ordinaryChar('$');
st.ordinaryChar('"');
st.whitespaceChars('/','/');
st.whitespaceChars('\\','\\');
st.whitespaceChars('.','.');
st.whitespaceChars(',',',');
st.whitespaceChars(';',';');
st.whitespaceChars(':',':');
st.whitespaceChars('=','=');
st.whitespaceChars('\'','\'');
st.whitespaceChars('`','`');
st.whitespaceChars('[',']');
while(st.nextToken() != StreamTokenizer.TT_EOF) {
String s;
switch(st.ttype) {
case StreamTokenizer.TT_EOL:
s = new String("EOL");
break;
/** case StreamTokenizer.TT_NUMBER:
s = Double.toString(st.nval);
break;**/
case StreamTokenizer.TT_WORD:
s = st.sval; // Already a String
// System.out.println("Token Extracted = " + st.sval);
st.lowerCaseMode(true);
numberOfTokens++;
break;
default:
s = String.valueOf((char)st.ttype);
}
if(counts.containsKey(s))
((Counter)counts.get(s)).increment();
else
counts.put(s, new Counter());
}
}
catch(IOException e)
{
System.out.println("st.nextToken() unsuccessful");
System.out.println("Problem reading " + fileName );
System.out.println("Exception: " + e);
e.printStackTrace();
}
//create instance of commonwords
CommonWords c = new CommonWords();
cword = c.cwords;
//remove the commonwords from HashMap
counts.remove(cword);
System.out.println("File "+fileName+" was succesfully loaded and Stored.");
System.out.println("Tokens in File :" + numberOfTokens);
System.out.println(cword);
}
public static void main(String[] args)
{
Train t = new Train();
}
}
CommonWords.java
:
import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;
public class CommonWords
{
public Hashtable cwords;
public FileReader file;
public StreamTokenizer st;
public CommonWords() {
String fileName = "cwords.txt";
int tokenType = 0;
int numberOfTokens = 0;
try
{
Reader cWords = new BufferedReader(new FileReader(fileName));
StreamTokenizer stc = new StreamTokenizer(cWords);
int i=0;
while(stc.nextToken() != StreamTokenizer.TT_EOF)
{
cwords.put(stc.sval.toLowerCase(), new Integer(i));
System.out.println("Token Extracted = " + stc.sval);
i=i++;
}
}
catch(IOException e)
{
System.out.println("st.nextToken() unsuccessful");
System.out.println("Problem reading " + fileName );
System.out.println("Exception: " + e);
e.printStackTrace();
}
}
}
Error Message:
:
java.lang.NullPointerException
at CommonWords.<init>(CommonWords.java:26)
at Train.<init>(Train.java:80)
at Train.main(Train.java:92)
Exception in thread "main"
I think i have something wrong with the adding of the commonwords to the hashtable..
Does anyone have any ideas.
Thanks :rock:
|