Programming Forums
User Name Password Register
 

RSS Feed
FORUM INDEX | TODAY'S POSTS | UNANSWERED THREADS | ADVANCED SEARCH

Reply
 
Thread Tools Display Modes
Old Nov 26th, 2004, 1:04 PM   #1
ellomoto
Newbie
 
Join Date: Nov 2004
Posts: 4
Rep Power: 0 ellomoto is on a distinguished road
Hi all,

im trying to build a text classification program that reads in a text file, tokenises it and stores it in a hashmap. Along the way i would like to instantiate another class which reads a text file full of commonwords. I would then like to remove these common words from the initial hashmap. I have set up two classes one called CommonWords.java and the other called Train.java. I am getting an error which i have posted after my code.

Train.java
import java.io.*; 
import java.util.*;
import javax.swing.JOptionPane;

public class Train {
  
  public FileReader file;
  public StreamTokenizer st;
  public HashMap counts = new HashMap();
  public Hashtable cword;
  
/** Creates a new instance of train */
public Train() {
    
    String fileName = "SPAM.txt";     
    String data;    
    HashMap counts = new HashMap(); 
    int tokenType = 0;
    int numberOfTokens = 0;
    
    try 
    {      
    FileReader file = new FileReader(fileName);
    StreamTokenizer st = new StreamTokenizer(new BufferedReader(file));
    
    st.ordinaryChar('!');
    st.ordinaryChar('$');
    st.ordinaryChar('"');
    st.whitespaceChars('/','/');
    st.whitespaceChars('\\','\\');
    st.whitespaceChars('.','.');
    st.whitespaceChars(',',',');
    st.whitespaceChars(';',';');
    st.whitespaceChars(':',':');
    st.whitespaceChars('=','=');
    st.whitespaceChars('\'','\'');
    st.whitespaceChars('`','`');
    st.whitespaceChars('[',']');   
    
   while(st.nextToken() != StreamTokenizer.TT_EOF) {
     
    String s;
    
    switch(st.ttype) {
     case StreamTokenizer.TT_EOL:
      s = new String("EOL");      
      break;
      
    /** case StreamTokenizer.TT_NUMBER:
      s = Double.toString(st.nval);      
      break;**/
      
     case StreamTokenizer.TT_WORD:
      s = st.sval; // Already a String
      
     // System.out.println("Token Extracted = " + st.sval);
      st.lowerCaseMode(true);
      numberOfTokens++;
      break;
      
     default: 
      s = String.valueOf((char)st.ttype);
      
    }
    
    if(counts.containsKey(s))
     ((Counter)counts.get(s)).increment();
    else
     counts.put(s, new Counter());
   }
    }   
  catch(IOException e) 
  {
   System.out.println("st.nextToken() unsuccessful");
   System.out.println("Problem reading " + fileName );
   System.out.println("Exception: " + e);
   e.printStackTrace();
  }
    //create instance of commonwords
    CommonWords c = new CommonWords();    
    cword = c.cwords; 
    
    //remove the commonwords from HashMap
    counts.remove(cword);
    System.out.println("File "+fileName+" was succesfully loaded and Stored.");
    System.out.println("Tokens in File :" + numberOfTokens);
    System.out.println(cword);    
   }

public static void main(String[] args) 
 {
   Train t = new Train();  
 }  
}

CommonWords.java
import java.io.*; 
import java.util.*;
import javax.swing.JOptionPane;

public class CommonWords 
{  
  public Hashtable cwords;
  public FileReader file;
  public StreamTokenizer st;
  
public CommonWords() {     
    
    String fileName = "cwords.txt";     
    int tokenType = 0;
    int numberOfTokens = 0; 
    
   try
   {
     Reader cWords = new BufferedReader(new FileReader(fileName));      
     StreamTokenizer stc = new StreamTokenizer(cWords); 
     
     int i=0; 

     while(stc.nextToken() != StreamTokenizer.TT_EOF)
     {
       cwords.put(stc.sval.toLowerCase(), new Integer(i));       
       System.out.println("Token Extracted = " + stc.sval);        
       i=i++;
     }
   }
    catch(IOException e) 
      {
       System.out.println("st.nextToken() unsuccessful");
       System.out.println("Problem reading " + fileName );
       System.out.println("Exception: " + e);
       e.printStackTrace();
      }    
  } 
}

Error Message:
java.lang.NullPointerException
    at CommonWords.<init>(CommonWords.java:26)
    at Train.<init>(Train.java:80)
    at Train.main(Train.java:92)
Exception in thread "main"

I think i have something wrong with the adding of the commonwords to the hashtable..

Does anyone have any ideas.

Thanks :rock:
ellomoto is offline   Reply With Quote
Reply

Bookmarks

« Previous Thread in Forum | Next Thread in Forum »

Currently Active Users Viewing This Thread: 1 (0 members and 1 guests)
 
Thread Tools
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Forum Jump




DaniWeb IT Discussion Community
All times are GMT -5. The time now is 10:01 AM.

Powered by vBulletin® Version 3.7.0, Copyright ©2000 - 2008, Jelsoft Enterprises Ltd.
Copyright ©2007 DaniWeb® LLC