Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to automate parameters passed into pandas.read_sql?

Tags:

python

sql

pandas

I am trying to create a methodology for passing parameters automatically through something like locals(), similarly to how f-strings work.

How it currently works

import pandas as pd

def my_func(conn, string_id, date, integer_ids):
    sql = f"""    
    select * from TABLE a
    where STRING_ID = '{string_id}'
    and DATE = {date}
    and INTEGER_ID in ({','.join(map(str, integer_ids))})"""
    df = pd.read_sql(sql, conn)
    return df

However, this approach means I cannot copy-paste the SQL into SQL developer or similar, and run it from there. So I would like an approach that makes use of parameters instead.

There seems to be two problems with that

  1. Parameters must be literals, so its not possible to pass along lists
  2. I need to create a dictionary manually, and cannot simply pass something like locals()

How I would like it to work would be something like the example below (which obviously doesn't work)

import pandas as pd

def my_func(conn, string_id, date, integer_ids):
    sql = """    
    select * from TABLE
    where STRING_ID = :string_id
    and DATE = :date
    and INTEGER_ID in :integer_ids"""
    df = pd.read_sql(sql, conn, params=locals())
    return df

EDIT: Thanks to perl, I now have a working solution to my problem

def read_sql(sql, conn, params):
    # Finds all words following a ":" sign in the sql
    for p in re.findall(':(\w+)', sql):
        if isinstance(params.get(p), (tuple, list)):
            ext_params = {f'{p}_{i:03d}': p_i for i, p_i in enumerate(params.get(p))}
            sql = sql.replace(f':{p}', f"(:{', :'.join(ext_params)})")
            params.update(ext_params)

    sql_text = sqlalchemy.text(sql)
    return pd.read_sql(sql_text, conn, params=params)


def my_func(conn, string_id, date, integer_ids):
    sql = """    
    select * from TABLE
    where STRING_ID = :string_id
    and DATE = :date
    and INTEGER_ID in :integer_ids"""
    df = read_sql(sql, conn, locals())
    return df

EDIT2: For anyone finding this question, I have since then extended the solution a bit to cover issues where lists longer than 1000 elements are passed

def generate_sql(sql: str, params: dict = None, param_key: str = ':') -> List[Tuple[sqlalchemy.text, dict]]:
    if params is None:
        params = dict()
    max_sql_params = 1000

    out = []
    # Finds all words following a ":" sign in the query
    for p in set(re.findall(f"{param_key}(\w+)", sql)):
        if isinstance(params.get(p), (tuple, list, np.ndarray)):
            # Recursively call function for variables with more than 1000 elements
            if len(params[p]) > max_sql_params:
                new_params = params.copy()  # NB: Shallow copy sufficient as param keys are tuples, lists or arrays
                new_params[p] = params[p][max_sql_params:]
                out.extend(generate_sql(sql=sql, params=new_params, param_key=param_key))
            extra_params = {f"{p}_{i:03d}": p_i for i, p_i in enumerate(params[p][:max_sql_params])}
            sql = sql.replace(f":{p}", f"(:{', :'.join(extra_params)})")
            params.update(extra_params)

    sql_text = sqlalchemy.text(sql)
    out.append((sql_text, params))
    return out


def read_sql(sql: str, conn: sqlalchemy.engine, params: dict = None) -> pd.DataFrame:
    sql_tuples = generate_sql(sql=sql, params=params)
    df = pd.concat(pd.read_sql(sql=s, con=conn, params=p) for s, p in sql_tuples)

    return df
like image 953
oskros Avatar asked Oct 24 '25 05:10

oskros


2 Answers

You can use parametrized queries by wrapping the query in sqlalchemy.text and converting lists to tuples. For example:

def my_func(conn, min_number, letters):
    # convert lists to tuples
    letters = tuple(letters)
    
    # wrap sql in sqlalchemy.text
    sql = sqlalchemy.text("""    
        SELECT *
        FROM letters
        WHERE
            number >= :min_number AND
            letter in :letters""")
    
    # read and return the resulting dataframe
    df = pd.read_sql(sql, conn, params=locals())
    return df

my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])

Output:

  letter  number
0      x      23
1      y      24
2      z      25

For completeness of the example, the following was used as a test table:

df = pd.DataFrame({
    'letter': list(string.ascii_lowercase),
    'number': range(len(string.ascii_lowercase))})
df.to_sql('letters', conn, index=False)

Update: Here's a possible workaround for Oracle to make it work with lists:

def get_query(sql, **kwargs):
    for k, v in kwargs.items():
        vs = "','".join(v)
        sql = sql.replace(f':{k}', f"('{vs}')")
    return sql

def my_func(conn, min_number, letters):
    sql_template = """    
        SELECT *
        FROM letters
        WHERE
            number >= :min_number AND
            letter in :letters
    """
    # pass list variables to `get_query` function as named parameters
    # to get parameters replaced with ('value1', 'value2', ..., 'valueN')
    sql = sqlalchemy.text(
        get_query(sql_template, letters=letters))
    
    df = pd.read_sql(sql, conn, params=locals())
    return df

my_func(conn, 10, ['a', 'b', 'c', 'x', 'y', 'z'])

Update 2: Here's the get_query function that works with both strings and numbers (enclosing in quotes strings, but not numbers):

def get_query(sql, **kwargs):
    # enclose in quotes strings, but not numbers
    def q(x):
        q = '' if isinstance(x, (int, float)) else "'"
        return f'{q}{x}{q}'
    
    # replace with values
    for k, v in kwargs.items():
        sql = sql.replace(f':{k}', f"({','.join([q(x) for x in v])})")

    return sql

For example:

sql = """    
SELECT *
FROM letters
WHERE
    number in :numbers AND
    letters in :letters
"""

get_query(sql,
          numbers=[1, 2, 3],
          letters=['A', 'B', 'C'])

Output:

SELECT *
FROM letters
WHERE
    number in (1,2,3) AND
    letters in ('A','B','C')
like image 131
perl Avatar answered Oct 25 '25 18:10

perl


why not this :

import pandas as pd

def my_func(conn, string_id, date, integer_ids):
    sql = """    
    select * from RISK
    where STRING_ID = %s
    and DATE = %s
    and INTEGER_ID in %s"""
    df = pd.read_sql(sql, conn, (string_id, date,integer_ids))
    return df
like image 45
eshirvana Avatar answered Oct 25 '25 18:10

eshirvana