i would like scrap data from site http://www.x-rates.com/table/?from=USD&amount=1 (it is currency exchange site).
I want get "euro" word from table, but i get empty list. Here is my code:
from bs4 import BeautifulSoup
import requests
res = requests.get('http://www.x-rates.com/table/?from=USD&amount=1')
soup = bs4.BeautifulSoup(res.text, 'html.parser')
hehe = soup.select('table.ratesTable:nth-child(4) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(1)')
print hehe
I also tried this:
hehe = soup.select('table.ratesTable + table.ratesTable + table.ratesTable + table.ratesTable table.ratesTable + tbody + tbody + tbody + tr + tr + td + td')
but still nothing. What should i change?
If you want to use select you can use use nth-of-type
which is supported in bs4 to pull the first td in the table which is where the first Euro
appears:
soup = BeautifulSoup(res.text, 'html.parser')
hee = soup.select(".ratesTable td:nth-of-type(1)")
print(hee)
Output:
[<td>Euro</td>]
If you wanted to be more specific you could use table.class ..:
print(soup.select("table.ratesTable td:nth-of-type(1)"))
And to get the second Euro:
# 16th row, first td
print(soup.select(".tablesorter.ratesTable tr:nth-of-type(16) td:nth-of-type(1)"))
Output:
[<td>Euro</td>]
Or using find:
soup = BeautifulSoup(res.text, 'html.parser')
table = soup.find("table",{"class":"ratesTable"})
print(table.td.text)
print(table)
Output:
Euro
If you used soup.select("table.ratesTable:nth-child(4)")
you will see it returns nothing so your css is wrong.
To get all the data:
# two tables
tables = soup.select(".ratesTable")
table_data = {}
cols = [th.text for th in tables[0].find_all("th")]
for table in tables:
for tr in table.find_all("tr"):
data = [td.text for td in tr.find_all("td")]
if data:
table_data[data[0]] = dict(zip(cols, data))
from pprint import pprint as pp
pp(table_data)
Output:
{u'Argentine Peso': {u'1.00 USD': u'15.358344',
u'US Dollar': u'Argentine Peso',
u'inv. 1.00 USD': u'0.065111'},
u'Australian Dollar': {u'1.00 USD': u'1.388393',
u'US Dollar': u'Australian Dollar',
u'inv. 1.00 USD': u'0.720257'},
u'Bahraini Dinar': {u'1.00 USD': u'0.376989',
u'US Dollar': u'Bahraini Dinar',
u'inv. 1.00 USD': u'2.652595'},
u'Botswana Pula': {u'1.00 USD': u'11.219075',
u'US Dollar': u'Botswana Pula',
u'inv. 1.00 USD': u'0.089134'},
u'Brazilian Real': {u'1.00 USD': u'3.927908',
u'US Dollar': u'Brazilian Real',
u'inv. 1.00 USD': u'0.254588'},
u'British Pound': {u'1.00 USD': u'0.716854',
u'US Dollar': u'British Pound',
u'inv. 1.00 USD': u'1.394983'},
u'Bruneian Dollar': {u'1.00 USD': u'1.403737',
u'US Dollar': u'Bruneian Dollar',
u'inv. 1.00 USD': u'0.712384'},
u'Bulgarian Lev': {u'1.00 USD': u'1.771194',
u'US Dollar': u'Bulgarian Lev',
u'inv. 1.00 USD': u'0.564591'},
u'Canadian Dollar': {u'1.00 USD': u'1.362152',
u'US Dollar': u'Canadian Dollar',
u'inv. 1.00 USD': u'0.734132'},
u'Chilean Peso': {u'1.00 USD': u'689.453282',
u'US Dollar': u'Chilean Peso',
u'inv. 1.00 USD': u'0.001450'},
u'Chinese Yuan Renminbi': {u'1.00 USD': u'6.532590',
u'US Dollar': u'Chinese Yuan Renminbi',
u'inv. 1.00 USD': u'0.153079'},
u'Colombian Peso': {u'1.00 USD': u'3321.597022',
u'US Dollar': u'Colombian Peso',
u'inv. 1.00 USD': u'0.000301'},
u'Croatian Kuna': {u'1.00 USD': u'6.926768',
u'US Dollar': u'Croatian Kuna',
u'inv. 1.00 USD': u'0.144367'},
u'Czech Koruna': {u'1.00 USD': u'24.605774',
u'US Dollar': u'Czech Koruna',
u'inv. 1.00 USD': u'0.040641'},
u'Danish Krone': {u'1.00 USD': u'6.783374',
u'US Dollar': u'Danish Krone',
u'inv. 1.00 USD': u'0.147419'},
u'Emirati Dirham': {u'1.00 USD': u'3.672956',
u'US Dollar': u'Emirati Dirham',
u'inv. 1.00 USD': u'0.272260'},
u'Euro': {u'1.00 USD': u'0.909064',
u'US Dollar': u'Euro',
u'inv. 1.00 USD': u'1.100033'},
u'Hong Kong Dollar': {u'1.00 USD': u'7.770873',
u'US Dollar': u'Hong Kong Dollar',
u'inv. 1.00 USD': u'0.128686'},
u'Hungarian Forint': {u'1.00 USD': u'282.628733',
u'US Dollar': u'Hungarian Forint',
u'inv. 1.00 USD': u'0.003538'},
u'Icelandic Krona': {u'1.00 USD': u'129.157149',
u'US Dollar': u'Icelandic Krona',
u'inv. 1.00 USD': u'0.007743'},
u'Indian Rupee': {u'1.00 USD': u'68.885961',
u'US Dollar': u'Indian Rupee',
u'inv. 1.00 USD': u'0.014517'},
u'Indonesian Rupiah': {u'1.00 USD': u'13420.180741',
u'US Dollar': u'Indonesian Rupiah',
u'inv. 1.00 USD': u'0.000075'},
u'Iranian Rial': {u'1.00 USD': u'30193.236727',
u'US Dollar': u'Iranian Rial',
u'inv. 1.00 USD': u'0.000033'},
u'Israeli Shekel': {u'1.00 USD': u'3.907342',
u'US Dollar': u'Israeli Shekel',
u'inv. 1.00 USD': u'0.255928'},
u'Japanese Yen': {u'1.00 USD': u'112.854369',
u'US Dollar': u'Japanese Yen',
u'inv. 1.00 USD': u'0.008861'},
u'Kazakhstani Tenge': {u'1.00 USD': u'349.948907',
u'US Dollar': u'Kazakhstani Tenge',
u'inv. 1.00 USD': u'0.002858'},
u'Kuwaiti Dinar': {u'1.00 USD': u'0.300490',
u'US Dollar': u'Kuwaiti Dinar',
u'inv. 1.00 USD': u'3.327899'},
u'Latvian Lat': {u'1.00 USD': u'0.638890',
u'US Dollar': u'Latvian Lat',
u'inv. 1.00 USD': u'1.565215'},
u'Libyan Dinar': {u'1.00 USD': u'1.389216',
u'US Dollar': u'Libyan Dinar',
u'inv. 1.00 USD': u'0.719831'},
u'Lithuanian Litas': {u'1.00 USD': u'3.138815',
u'US Dollar': u'Lithuanian Litas',
u'inv. 1.00 USD': u'0.318592'},
u'Malaysian Ringgit': {u'1.00 USD': u'4.215841',
u'US Dollar': u'Malaysian Ringgit',
u'inv. 1.00 USD': u'0.237201'},
u'Mauritian Rupee': {u'1.00 USD': u'35.959724',
u'US Dollar': u'Mauritian Rupee',
u'inv. 1.00 USD': u'0.027809'},
u'Mexican Peso': {u'1.00 USD': u'18.099833',
u'US Dollar': u'Mexican Peso',
u'inv. 1.00 USD': u'0.055249'},
u'Nepalese Rupee': {u'1.00 USD': u'109.959953',
u'US Dollar': u'Nepalese Rupee',
u'inv. 1.00 USD': u'0.009094'},
u'New Zealand Dollar': {u'1.00 USD': u'1.495957',
u'US Dollar': u'New Zealand Dollar',
u'inv. 1.00 USD': u'0.668468'},
u'Norwegian Krone': {u'1.00 USD': u'8.661961',
u'US Dollar': u'Norwegian Krone',
u'inv. 1.00 USD': u'0.115447'},
u'Omani Rial': {u'1.00 USD': u'0.385000',
u'US Dollar': u'Omani Rial',
u'inv. 1.00 USD': u'2.597403'},
u'Pakistani Rupee': {u'1.00 USD': u'104.604918',
u'US Dollar': u'Pakistani Rupee',
u'inv. 1.00 USD': u'0.009560'},
u'Philippine Peso': {u'1.00 USD': u'47.606650',
u'US Dollar': u'Philippine Peso',
u'inv. 1.00 USD': u'0.021005'},
u'Polish Zloty': {u'1.00 USD': u'3.960685',
u'US Dollar': u'Polish Zloty',
u'inv. 1.00 USD': u'0.252482'},
u'Qatari Riyal': {u'1.00 USD': u'3.641295',
u'US Dollar': u'Qatari Riyal',
u'inv. 1.00 USD': u'0.274628'},
u'Romanian New Leu': {u'1.00 USD': u'4.060863',
u'US Dollar': u'Romanian New Leu',
u'inv. 1.00 USD': u'0.246253'},
u'Russian Ruble': {u'1.00 USD': u'75.913328',
u'US Dollar': u'Russian Ruble',
u'inv. 1.00 USD': u'0.013173'},
u'Saudi Arabian Riyal': {u'1.00 USD': u'3.750501',
u'US Dollar': u'Saudi Arabian Riyal',
u'inv. 1.00 USD': u'0.266631'},
u'Singapore Dollar': {u'1.00 USD': u'1.403737',
u'US Dollar': u'Singapore Dollar',
u'inv. 1.00 USD': u'0.712384'},
u'South African Rand': {u'1.00 USD': u'15.547001',
u'US Dollar': u'South African Rand',
u'inv. 1.00 USD': u'0.064321'},
u'South Korean Won': {u'1.00 USD': u'1238.257908',
u'US Dollar': u'South Korean Won',
u'inv. 1.00 USD': u'0.000808'},
u'Sri Lankan Rupee': {u'1.00 USD': u'144.195067',
u'US Dollar': u'Sri Lankan Rupee',
u'inv. 1.00 USD': u'0.006935'},
u'Swedish Krona': {u'1.00 USD': u'8.530904',
u'US Dollar': u'Swedish Krona',
u'inv. 1.00 USD': u'0.117221'},
u'Swiss Franc': {u'1.00 USD': u'0.994570',
u'US Dollar': u'Swiss Franc',
u'inv. 1.00 USD': u'1.005460'},
u'Taiwan New Dollar': {u'1.00 USD': u'33.188318',
u'US Dollar': u'Taiwan New Dollar',
u'inv. 1.00 USD': u'0.030131'},
u'Thai Baht': {u'1.00 USD': u'35.687352',
u'US Dollar': u'Thai Baht',
u'inv. 1.00 USD': u'0.028021'},
u'Trinidadian Dollar': {u'1.00 USD': u'6.515309',
u'US Dollar': u'Trinidadian Dollar',
u'inv. 1.00 USD': u'0.153485'},
u'Turkish Lira': {u'1.00 USD': u'2.922907',
u'US Dollar': u'Turkish Lira',
u'inv. 1.00 USD': u'0.342125'},
u'Venezuelan Bolivar': {u'1.00 USD': u'6.320083',
u'US Dollar': u'Venezuelan Bolivar',
u'inv. 1.00 USD': u'0.158226'}}
You can structure the dict however you prefer but the logic will still be the same.
If you just wanted the tablesorter
:
# one specific table
table = soup.select(".tablesorter.ratesTable")
table_data = {}
cols = [th.text for th in table[0].find_all("th")]
for tr in table[0].find_all("tr"):
data = [td.text for td in tr.find_all("td")]
if data:
table_data[data[0]] = dict(zip(cols, data))
print(table_data)
Output:
{u'Argentine Peso': {u'1.00 USD': u'15.324285',
u'US Dollar': u'Argentine Peso',
u'inv. 1.00 USD': u'0.065256'},
u'Australian Dollar': {u'1.00 USD': u'1.388630',
u'US Dollar': u'Australian Dollar',
u'inv. 1.00 USD': u'0.720134'},
u'Bahraini Dinar': {u'1.00 USD': u'0.376989',
u'US Dollar': u'Bahraini Dinar',
u'inv. 1.00 USD': u'2.652595'},
u'Botswana Pula': {u'1.00 USD': u'11.219075',
u'US Dollar': u'Botswana Pula',
u'inv. 1.00 USD': u'0.089134'},
u'Brazilian Real': {u'1.00 USD': u'3.936188',
u'US Dollar': u'Brazilian Real',
u'inv. 1.00 USD': u'0.254053'},
u'British Pound': {u'1.00 USD': u'0.717464',
u'US Dollar': u'British Pound',
u'inv. 1.00 USD': u'1.393799'},
u'Bruneian Dollar': {u'1.00 USD': u'1.403808',
u'US Dollar': u'Bruneian Dollar',
u'inv. 1.00 USD': u'0.712348'},
u'Bulgarian Lev': {u'1.00 USD': u'1.775921',
u'US Dollar': u'Bulgarian Lev',
u'inv. 1.00 USD': u'0.563088'},
u'Canadian Dollar': {u'1.00 USD': u'1.362506',
u'US Dollar': u'Canadian Dollar',
u'inv. 1.00 USD': u'0.733942'},
u'Chilean Peso': {u'1.00 USD': u'691.510617',
u'US Dollar': u'Chilean Peso',
u'inv. 1.00 USD': u'0.001446'},
u'Chinese Yuan Renminbi': {u'1.00 USD': u'6.533541',
u'US Dollar': u'Chinese Yuan Renminbi',
u'inv. 1.00 USD': u'0.153056'},
u'Colombian Peso': {u'1.00 USD': u'3313.262601',
u'US Dollar': u'Colombian Peso',
u'inv. 1.00 USD': u'0.000302'},
u'Croatian Kuna': {u'1.00 USD': u'6.920610',
u'US Dollar': u'Croatian Kuna',
u'inv. 1.00 USD': u'0.144496'},
u'Czech Koruna': {u'1.00 USD': u'24.583134',
u'US Dollar': u'Czech Koruna',
u'inv. 1.00 USD': u'0.040678'},
u'Danish Krone': {u'1.00 USD': u'6.776307',
u'US Dollar': u'Danish Krone',
u'inv. 1.00 USD': u'0.147573'},
u'Emirati Dirham': {u'1.00 USD': u'3.673148',
u'US Dollar': u'Emirati Dirham',
u'inv. 1.00 USD': u'0.272246'},
u'Euro': {u'1.00 USD': u'0.908120',
u'US Dollar': u'Euro',
u'inv. 1.00 USD': u'1.101176'},
u'Hong Kong Dollar': {u'1.00 USD': u'7.771176',
u'US Dollar': u'Hong Kong Dollar',
u'inv. 1.00 USD': u'0.128681'},
u'Hungarian Forint': {u'1.00 USD': u'282.305073',
u'US Dollar': u'Hungarian Forint',
u'inv. 1.00 USD': u'0.003542'},
u'Icelandic Krona': {u'1.00 USD': u'129.154766',
u'US Dollar': u'Icelandic Krona',
u'inv. 1.00 USD': u'0.007743'},
u'Indian Rupee': {u'1.00 USD': u'68.865641',
u'US Dollar': u'Indian Rupee',
u'inv. 1.00 USD': u'0.014521'},
u'Indonesian Rupiah': {u'1.00 USD': u'13422.938587',
u'US Dollar': u'Indonesian Rupiah',
u'inv. 1.00 USD': u'0.000074'},
u'Iranian Rial': {u'1.00 USD': u'30193.236717',
u'US Dollar': u'Iranian Rial',
u'inv. 1.00 USD': u'0.000033'},
u'Israeli Shekel': {u'1.00 USD': u'3.903987',
u'US Dollar': u'Israeli Shekel',
u'inv. 1.00 USD': u'0.256148'},
u'Japanese Yen': {u'1.00 USD': u'112.709992',
u'US Dollar': u'Japanese Yen',
u'inv. 1.00 USD': u'0.008872'},
u'Kazakhstani Tenge': {u'1.00 USD': u'349.948907',
u'US Dollar': u'Kazakhstani Tenge',
u'inv. 1.00 USD': u'0.002858'},
u'Kuwaiti Dinar': {u'1.00 USD': u'0.300490',
u'US Dollar': u'Kuwaiti Dinar',
u'inv. 1.00 USD': u'3.327899'},
u'Latvian Lat': {u'1.00 USD': u'0.638227',
u'US Dollar': u'Latvian Lat',
u'inv. 1.00 USD': u'1.566841'},
u'Libyan Dinar': {u'1.00 USD': u'1.389216',
u'US Dollar': u'Libyan Dinar',
u'inv. 1.00 USD': u'0.719831'},
u'Lithuanian Litas': {u'1.00 USD': u'3.135556',
u'US Dollar': u'Lithuanian Litas',
u'inv. 1.00 USD': u'0.318923'},
u'Malaysian Ringgit': {u'1.00 USD': u'4.217441',
u'US Dollar': u'Malaysian Ringgit',
u'inv. 1.00 USD': u'0.237111'},
u'Mauritian Rupee': {u'1.00 USD': u'35.959724',
u'US Dollar': u'Mauritian Rupee',
u'inv. 1.00 USD': u'0.027809'},
u'Mexican Peso': {u'1.00 USD': u'18.131872',
u'US Dollar': u'Mexican Peso',
u'inv. 1.00 USD': u'0.055152'},
u'Nepalese Rupee': {u'1.00 USD': u'109.959303',
u'US Dollar': u'Nepalese Rupee',
u'inv. 1.00 USD': u'0.009094'},
u'New Zealand Dollar': {u'1.00 USD': u'1.494449',
u'US Dollar': u'New Zealand Dollar',
u'inv. 1.00 USD': u'0.669143'},
u'Norwegian Krone': {u'1.00 USD': u'8.655515',
u'US Dollar': u'Norwegian Krone',
u'inv. 1.00 USD': u'0.115533'},
u'Omani Rial': {u'1.00 USD': u'0.385000',
u'US Dollar': u'Omani Rial',
u'inv. 1.00 USD': u'2.597403'},
u'Pakistani Rupee': {u'1.00 USD': u'104.604918',
u'US Dollar': u'Pakistani Rupee',
u'inv. 1.00 USD': u'0.009560'},
u'Philippine Peso': {u'1.00 USD': u'47.623330',
u'US Dollar': u'Philippine Peso',
u'inv. 1.00 USD': u'0.020998'},
u'Polish Zloty': {u'1.00 USD': u'3.957191',
u'US Dollar': u'Polish Zloty',
u'inv. 1.00 USD': u'0.252704'},
u'Qatari Riyal': {u'1.00 USD': u'3.640748',
u'US Dollar': u'Qatari Riyal',
u'inv. 1.00 USD': u'0.274669'},
u'Romanian New Leu': {u'1.00 USD': u'4.056672',
u'US Dollar': u'Romanian New Leu',
u'inv. 1.00 USD': u'0.246507'},
u'Russian Ruble': {u'1.00 USD': u'76.158926',
u'US Dollar': u'Russian Ruble',
u'inv. 1.00 USD': u'0.013130'},
u'Saudi Arabian Riyal': {u'1.00 USD': u'3.749980',
u'US Dollar': u'Saudi Arabian Riyal',
u'inv. 1.00 USD': u'0.266668'},
u'Singapore Dollar': {u'1.00 USD': u'1.403808',
u'US Dollar': u'Singapore Dollar',
u'inv. 1.00 USD': u'0.712348'},
u'South African Rand': {u'1.00 USD': u'15.576569',
u'US Dollar': u'South African Rand',
u'inv. 1.00 USD': u'0.064199'},
u'South Korean Won': {u'1.00 USD': u'1239.577296',
u'US Dollar': u'South Korean Won',
u'inv. 1.00 USD': u'0.000807'},
u'Sri Lankan Rupee': {u'1.00 USD': u'144.195899',
u'US Dollar': u'Sri Lankan Rupee',
u'inv. 1.00 USD': u'0.006935'},
u'Swedish Krona': {u'1.00 USD': u'8.526837',
u'US Dollar': u'Swedish Krona',
u'inv. 1.00 USD': u'0.117277'},
u'Swiss Franc': {u'1.00 USD': u'0.992590',
u'US Dollar': u'Swiss Franc',
u'inv. 1.00 USD': u'1.007465'},
u'Taiwan New Dollar': {u'1.00 USD': u'33.191630',
u'US Dollar': u'Taiwan New Dollar',
u'inv. 1.00 USD': u'0.030128'},
u'Thai Baht': {u'1.00 USD': u'35.677099',
u'US Dollar': u'Thai Baht',
u'inv. 1.00 USD': u'0.028029'},
u'Trinidadian Dollar': {u'1.00 USD': u'6.515314',
u'US Dollar': u'Trinidadian Dollar',
u'inv. 1.00 USD': u'0.153485'},
u'Turkish Lira': {u'1.00 USD': u'2.923851',
u'US Dollar': u'Turkish Lira',
u'inv. 1.00 USD': u'0.342015'},
u'Venezuelan Bolivar': {u'1.00 USD': u'6.349609',
u'US Dollar': u'Venezuelan Bolivar',
u'inv. 1.00 USD': u'0.157490'}}
What you may find helpful is if you open up developer tools and have a look at the styles, you will get a few hints on how to select certain element.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With