Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

FutureWarning: Passing literal html to 'read_html' is deprecated and will be removed in a future version

I'm getting this error when wrapping a soup element in a str.

I'm trying to parse a table with pandas. I'm getting the correct output but also this warning:

"FutureWarning: Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object

def parse_html(box_scores):
    with open(box_scores) as f: 
        html = f.read()
    
    soup = BeautifulSoup(html, features="lxml")
    [s.decompose() for s in soup.select("tr.over_header")]
    [s.decompose() for s in soup.select("tr.theader")]
    return soup


def read_line_score(soup):
    line_score = pd.read_html(str(soup), attrs = {'id': 'line_score'})[0]
    cols = list(line_score.columns)
    cols[0] = "team"
    cols[-1] = "total"
    line_score.columns = cols
    
    line_score = line_score[["team", "total"]]
    
    return line_score

def read_stats(soup, team, stat):
    df = pd.read_html(str(soup), attrs={"id": f"box-{team}-game-{stat}"}, index_col=0)[0]
    df = df.apply(pd.to_numeric, errors="coerce")
    return df
like image 332
Cayp Avatar asked Dec 17 '25 22:12

Cayp


1 Answers

Instead:

line_score = pd.read_html(str(soup), attrs = {'id': 'line_score'})[0]

Use io.StringIO:

from io import StringIO


def parse_html(box_scores):
    with open(box_scores) as f: 
        html = f.read()
    
    soup = BeautifulSoup(html, features="lxml")
    [s.decompose() for s in soup.select("tr.over_header")]
    [s.decompose() for s in soup.select("tr.theader")]
    return soup


def read_line_score(soup):
    line_score = pd.read_html(StringIO(str(soup)), attrs = {'id': 'line_score'})[0]
    cols = list(line_score.columns)
    cols[0] = "team"
    cols[-1] = "total"
    line_score.columns = cols
    
    line_score = line_score[["team", "total"]]
    
    return line_score

def read_stats(soup, team, stat):
    df = pd.read_html(StringIO(str(soup)), attrs={"id": f"box-{team}-game-{stat}"}, index_col=0)[0]
    df = df.apply(pd.to_numeric, errors="coerce")
    return df
like image 160
Andrej Kesely Avatar answered Dec 20 '25 11:12

Andrej Kesely



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!