View Single Post
Old May 26th, 2006, 12:07 PM   #205
Arevos
Programming Guru
 
Arevos's Avatar
 
Join Date: Aug 2005
Location: England
Posts: 1,499
Rep Power: 5 Arevos is on a distinguished road
There are a number of things that are wrong in your above script. I've fixed the problems I can see, and highlighted the changes in red.
import re
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup

def get_company_data(company_url):
        soup = BeautifulSoup(urlopen(company_url))

        #Company Name
        name = soup.firstText(re.compile("Company Profile"))
        
        
        #Company Profile - Table
        profile = soup.fetchText(re.compile("Company Profile"))[2]
        companyprofile = profile.findNext("table")
         
            
        #Contact Information - Table
        contact = soup.firstText(re.compile("Contact Information"))
        contacttable = contact.findParent("table")
          
            
        #Financial Highlights - Table
        highlights = soup.firstText(re.compile("Highlights"))
        fhighlights = highlights.findParent("table")
        
        if len(highlights) == 0:
            z = "N/A"

        else:
            z = fhighlights
                      
        #Key People
        key = soup.firstText(re.compile("Key People"))
        keypeople = key.findParent("table")
       
       
        output = "<table>"
        output += "<tr>\n"
        output += "<td>" + name + "</td>"
        output += "<td>" + companyprofile + "</td>"
        output += "<td>" + contacttable + "</td>"
        output += "<td>" + z + "</td>"
        output += "<td>" + keypeople + "</td>"
        output += "</tr>"
        output += "</table>"
        return output
    
print get_company_data("http://biz.yahoo.com/ic/135/135359.html")
Make sure you understand why the changes took place. You seem to be having a bit of trouble completely understanding functions and scope.
Arevos is offline   Reply With Quote