I'm getting this error when wrapping a soup element in a str.
I'm trying to parse a table with pandas. I'm getting the correct output but also this warning:
"FutureWarning: Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object
def parse_html(box_scores):
with open(box_scores) as f:
html = f.read()
soup = BeautifulSoup(html, features="lxml")
[s.decompose() for s in soup.select("tr.over_header")]
[s.decompose() for s in soup.select("tr.theader")]
return soup
def read_line_score(soup):
line_score = pd.read_html(str(soup), attrs = {'id': 'line_score'})[0]
cols = list(line_score.columns)
cols[0] = "team"
cols[-1] = "total"
line_score.columns = cols
line_score = line_score[["team", "total"]]
return line_score
def read_stats(soup, team, stat):
df = pd.read_html(str(soup), attrs={"id": f"box-{team}-game-{stat}"}, index_col=0)[0]
df = df.apply(pd.to_numeric, errors="coerce")
return df
Instead:
line_score = pd.read_html(str(soup), attrs = {'id': 'line_score'})[0]
Use io.StringIO:
from io import StringIO
def parse_html(box_scores):
with open(box_scores) as f:
html = f.read()
soup = BeautifulSoup(html, features="lxml")
[s.decompose() for s in soup.select("tr.over_header")]
[s.decompose() for s in soup.select("tr.theader")]
return soup
def read_line_score(soup):
line_score = pd.read_html(StringIO(str(soup)), attrs = {'id': 'line_score'})[0]
cols = list(line_score.columns)
cols[0] = "team"
cols[-1] = "total"
line_score.columns = cols
line_score = line_score[["team", "total"]]
return line_score
def read_stats(soup, team, stat):
df = pd.read_html(StringIO(str(soup)), attrs={"id": f"box-{team}-game-{stat}"}, index_col=0)[0]
df = df.apply(pd.to_numeric, errors="coerce")
return df
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With