View Single Post
Old May 26th, 2006, 11:40 AM   #204
zem52887
Hobbyist Programmer
 
Join Date: May 2006
Posts: 127
Rep Power: 3 zem52887 is on a distinguished road
Thanks... for the record, this is the script I attempted to test:
import re
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
html = urlopen("http://biz.yahoo.com/ic/135/135359.html")
soup = BeautifulSoup(html)

def get_company_data("http://biz.yahoo.com/ic/135/135359.html"):
        
        #Company Name
        name = soup.firstText(re.compile("Company Profile"))
        
        
        #Company Profile - Table
        profile = soup.fetchText(re.compile("Company Profile"))[2]
        companyprofile = profile.findNext("table")
         
            
        #Contact Information - Table
        contact = soup.firstText(re.compile("Contact Information"))
        contacttable = contact.findParent("table")
          
            
        #Financial Highlights - Table
        highlights = soup.firstText(re.compile("Highlights"))
        fhighlights = highlights.findParent("table")
        
        z = len(highlights)
        if z == 0:
            "N/A"

        else:
            fhighlights
                      
        #Key People
        key = soup.firstText(re.compile("Key People"))
        keypeople = key.findParent("table")
       
       
        output = "<table>"
        output += "<tr>\n"
        output += "<td>" + name + "</td>"
        output += "<td>" + companyprofile + "</td>"
        output += "<td>" + contacttable + "</td>"
        output += "<td>" + z + "</td>"
        output += "<td>" + keypeople + "</td>"
        output += "</tr>"
        output += "</table>"
        return output
    
print get_company_data("http://biz.yahoo.com/ic/135/135359.html")
zem52887 is offline   Reply With Quote