Programming Forums

Programming Forums (http://www.programmingforums.org/forumindex.php)
-   Java (http://www.programmingforums.org/forum17.html)
-   -   Streamtokeniser Trouble (http://www.programmingforums.org/showthread.php?t=1301)

ellomoto Nov 26th, 2004 2:04 PM

Hi all,

im trying to build a text classification program that reads in a text file, tokenises it and stores it in a hashmap. Along the way i would like to instantiate another class which reads a text file full of commonwords. I would then like to remove these common words from the initial hashmap. I have set up two classes one called CommonWords.java and the other called Train.java. I am getting an error which i have posted after my code.

Train.java
:

import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;

public class Train {
 
  public FileReader file;
  public StreamTokenizer st;
  public HashMap counts = new HashMap();
  public Hashtable cword;
 
/** Creates a new instance of train */
public Train() {
   
    String fileName = "SPAM.txt";   
    String data;   
    HashMap counts = new HashMap();
    int tokenType = 0;
    int numberOfTokens = 0;
   
    try
    {     
    FileReader file = new FileReader(fileName);
    StreamTokenizer st = new StreamTokenizer(new BufferedReader(file));
   
    st.ordinaryChar('!');
    st.ordinaryChar('$');
    st.ordinaryChar('"');
    st.whitespaceChars('/','/');
    st.whitespaceChars('\\','\\');
    st.whitespaceChars('.','.');
    st.whitespaceChars(',',',');
    st.whitespaceChars(';',';');
    st.whitespaceChars(':',':');
    st.whitespaceChars('=','=');
    st.whitespaceChars('\'','\'');
    st.whitespaceChars('`','`');
    st.whitespaceChars('[',']'); 
   
  while(st.nextToken() != StreamTokenizer.TT_EOF) {
   
    String s;
   
    switch(st.ttype) {
    case StreamTokenizer.TT_EOL:
      s = new String("EOL");     
      break;
     
    /** case StreamTokenizer.TT_NUMBER:
      s = Double.toString(st.nval);     
      break;**/
     
    case StreamTokenizer.TT_WORD:
      s = st.sval; // Already a String
     
    // System.out.println("Token Extracted = " + st.sval);
      st.lowerCaseMode(true);
      numberOfTokens++;
      break;
     
    default:
      s = String.valueOf((char)st.ttype);
     
    }
   
    if(counts.containsKey(s))
    ((Counter)counts.get(s)).increment();
    else
    counts.put(s, new Counter());
  }
    } 
  catch(IOException e)
  {
  System.out.println("st.nextToken() unsuccessful");
  System.out.println("Problem reading " + fileName );
  System.out.println("Exception: " + e);
  e.printStackTrace();
  }
    //create instance of commonwords
    CommonWords c = new CommonWords();   
    cword = c.cwords;
   
    //remove the commonwords from HashMap
    counts.remove(cword);
    System.out.println("File "+fileName+" was succesfully loaded and Stored.");
    System.out.println("Tokens in File :" + numberOfTokens);
    System.out.println(cword);   
  }

public static void main(String[] args)
 {
  Train t = new Train(); 
 } 
}


CommonWords.java
:

import java.io.*;
import java.util.*;
import javax.swing.JOptionPane;

public class CommonWords

  public Hashtable cwords;
  public FileReader file;
  public StreamTokenizer st;
 
public CommonWords() {   
   
    String fileName = "cwords.txt";   
    int tokenType = 0;
    int numberOfTokens = 0;
   
  try
  {
    Reader cWords = new BufferedReader(new FileReader(fileName));     
    StreamTokenizer stc = new StreamTokenizer(cWords);
   
    int i=0;

    while(stc.nextToken() != StreamTokenizer.TT_EOF)
    {
      cwords.put(stc.sval.toLowerCase(), new Integer(i));     
      System.out.println("Token Extracted = " + stc.sval);       
      i=i++;
    }
  }
    catch(IOException e)
      {
      System.out.println("st.nextToken() unsuccessful");
      System.out.println("Problem reading " + fileName );
      System.out.println("Exception: " + e);
      e.printStackTrace();
      }   
  }
}


Error Message:
:

java.lang.NullPointerException
    at CommonWords.<init>(CommonWords.java:26)
    at Train.<init>(Train.java:80)
    at Train.main(Train.java:92)
Exception in thread "main"


I think i have something wrong with the adding of the commonwords to the hashtable..

Does anyone have any ideas.

Thanks :rock:


All times are GMT -5. The time now is 2:26 AM.

Powered by vBulletin® Version 3.7.0, Copyright ©2000 - 2008, Jelsoft Enterprises Ltd.
Copyright ©2007 DaniWeb® LLC