lastly, I'm trying to test my get_company_urls to see if I'm ready for get_company_data, and I'm encountering some errors while trying to run the module, if anyone could help me troubleshoot I'd really appreciate it because I've been staring at this code for awhile and can't figure out what I did wrong.
from urllib2 import urlopen
from BeautifulSoup import BeautifulSoup
from time import sleep
industry_page = "http://biz.yahoo.com/ic/ind_index.html"
def get_industry_urls(industry_page):
soup = BeautifulSoup(urlopen(industry_page))
links = soup.fetch("table")[7].fetch("a")
return [a['href'] for a in links if a.string != "Alphabetical"]
def get_company_index(industry_url):
soup = BeautifulSoup(urlopen(industry_url))
index_link = soup.fetch("table")[11].fetch("a")[2]
return index_link['href']
def get_company_urls(company_index):
soup = BeautifulSoup(urlopen(company_index))
urls = soup.fetch("table")[1].fetch("a")
return[a['href'] for a in urls]
for industry_url in get_industry_urls(industry_page):
company_index = get_company_index(industry_url)
for company_index in get_company_index(industry_url):
print get_company_urls(company_index) thanks