I am trying to write data from a website. The data is listed as a table in HTML with the tags '' listing when a new block of data is listed in the rankings and '' for each descriptive item about the element in the ranking. The list is a rank of top 500 computers, listed 1-100 with each 1, 2, 3, 4, etc. item listed by '' and the each characteristic of the computer listed as '' (it's storage, max power, etc).
Here is my code:
# read the data from a URL
url = requests.get("https://www.top500.org/list/2018/06/")
url.status_code
url.content
# parse the URL using Beauriful Soup
soup = BeautifulSoup(url.content, 'html.parser')
filename = "computerRank10.csv"
f = open(filename,"w")
headers = "Rank, Site, System, Cores, RMax, RPeak, Power\n"
f.write(headers)
for record in soup.findAll('tr'):
# start building the record with an empty string
tbltxt = ""
tbltxt = tbltxt + data.text + ";"
tbltxt = tbltxt.replace('\n', ' ')
tbltxt = tbltxt.replace(',', '')
# f.write(tbltxt[0:-1] + '\n')
f.write(tbltxt + '\n')
f.close()
I'm getting nothing and my CSV file is always blank
You should use csv module on the Python standard library.
Here is a simpler solution:
import requests
import csv
from bs4 import BeautifulSoup as bs
url = requests.get("https://www.top500.org/list/2018/06")
soup = bs(url.content, 'html.parser')
filename = "computerRank10.csv"
csv_writer = csv.writer(open(filename, 'w'))
for tr in soup.find_all("tr"):
data = []
# for headers ( entered only once - the first time - )
for th in tr.find_all("th"):
data.append(th.text)
if data:
print("Inserting headers : {}".format(','.join(data)))
csv_writer.writerow(data)
continue
for td in tr.find_all("td"):
if td.a:
data.append(td.a.text.strip())
else:
data.append(td.text.strip())
if data:
print("Inserting data: {}".format(','.join(data)))
csv_writer.writerow(data)
Try the below script. It should fetch you all the data across and write the same in a csv file:
import csv
import requests
from bs4 import BeautifulSoup
link = "https://www.top500.org/list/2018/06/?page={}"
def get_data(link):
for url in [link.format(page) for page in range(1,6)]:
res = requests.get(url)
soup = BeautifulSoup(res.text,"lxml")
for items in soup.select("table.table tr"):
td = [item.get_text(strip=True) for item in items.select("th,td")]
writer.writerow(td)
if __name__ == '__main__':
with open("tabularitem.csv","w",newline="") as infile: #if encoding issue comes up then replace with ('tabularitem.csv', 'w', newline="", encoding="utf-8")
writer = csv.writer(infile)
get_data(link)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With