I know by using prepared statement we can set the column values. Here what I want is, I already have a list of queries written to execute on same table but with different column values. E.g.
select * from tableName as t1 where t1.tableColumnId=4 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
select * from tableName as t1 where t1.tableColumnId=6 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
As you can see both the queries are almost same except the tableColumnId
value. I want to save this in collection as
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
So that I won't have duplicate queries (where values are not considered).
How can I do this?
What Does Parsing a Query Mean? A SQL statement is comprised of various inputs, i.e. different tables, functions, expressions. Thus it is possible that there are multiple ways to execute one query. Of course, the query must run in the most optimal way in order to execute in the shortest possible time.
Every query must be parsed at least once. The parsing of a query is performed within the database using the Optimizer component.
If it’s not possible to find a reusable cursor or if the query has never been executed before, query optimization is required. This is called Hard Parsing. Hard parsing means that either the cursor was not found in the library cache or it was found but was invalidated for some reason.
A SQL statement is comprised of various inputs, i.e. different tables, functions, expressions. Thus it is possible that there are multiple ways to execute one query. Of course, the query must run in the most optimal way in order to execute in the shortest possible time.
4
, 6
, 'test'
in you queries, build (flat) syntactic tree, and compare the trees to one another to identify those literal values that may differ from one query to another.UPDATE
To parse the SQL, you could user a parser generator like ANTLR or JavaCC. ANTLR and JavaCC grammars exist for SQL, and you could start with one of them.
That said, I think this approach would be overkill in this instance; I would rather use the third.
UPDATE 2: (third method)
To locate literal strings and numbers, you can use a regexp:
private static final Pattern CONST_PATTERN
= Pattern.compile("([^0-9a-zA-Z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)"
+ "(?:[Ee][+-][0-9]+])?"
+ "|(?:\\'[^']*\\')+)", Pattern.CASE_INSENSITIVE);
You can parse a query while generating the following structure:
private static class ParameterizedQuery {
final String sql;
final Parameter[] params;
ParameterizedQuery(String sql, Parameter[] params) {
this.sql = sql;
this.params = params.clone();
}
}
private static class Parameter {
final int position;
final String value;
Parameter(int position, String value) {
this.position = position;
this.value = value;
}
}
The resulting sql query is the input query with all the literals replaced with question marks. The parsing is done as follows:
private static ParameterizedQuery parse(String query) {
List<Parameter> parms = new ArrayList<>();
Matcher matcher = CONST_PATTERN.matcher(query);
int start = 0;
StringBuilder buf = new StringBuilder();
while (matcher.find()) {
int pos = matcher.start();
buf.append(query, start, pos)
.append(matcher.group(1))
.append("?");
parms.add(new Parameter(buf.length()-1,matcher.group(2)));
start = matcher.end();
}
buf.append(query, start, query.length());
return new ParameterizedQuery(
buf.toString(), parms.toArray(new Parameter[parms.size()]));
}
Now, if you have a list of queries, and you want to keep as parameter only those that are not equal in all the input queries, you parse all your queries, producing an array of ParameterizedQuery, and the simplify that array:
private static ParameterizedQuery[] simplify(ParameterizedQuery[] queries) {
if (queries.length == 0) {
return queries;
}
ParameterizedQuery prev = null;
boolean[] diff = null;
for (ParameterizedQuery cur: queries) {
if (prev == null) {
diff = new boolean[cur.params.length];
} else {
if (!cur.sql.equals(prev.sql)) {
throw new RuntimeException(
"Queries are too different: [" + prev.sql
+ "] and [" + cur.sql + "]");
} else if (cur.params.length != prev.params.length) {
throw new RuntimeException(
"Different number of parameters: ["
+ prev.params.length
+ "] and [" + cur.params.length + "]");
}
for (int i = 0; i < diff.length; ++i) {
if (!cur.params[i].value.equals(prev.params[i].value)) {
diff[i] = true;
}
}
}
prev = cur;
}
if (and(diff)) {
return queries;
}
ParameterizedQuery[] result = new ParameterizedQuery[queries.length];
result[0] = expandQuery(queries[0].sql, queries[0].params, diff);
for (int i = 1; i < queries.length; ++i) {
result[i] = new ParameterizedQuery(result[0].sql,
keep(queries[i].params, result[0].params, diff));
}
return result;
}
private static boolean and(boolean[] arr) {
for (boolean b: arr) {
if (!b) {
return false;
}
}
return true;
}
private static ParameterizedQuery expandQuery(String query,
Parameter[] params, boolean[] diff) {
int count = 0;
for (boolean b: diff) {
if (b) {
++count;
}
}
Parameter[] result = new Parameter[count];
int r = 0;
int start = 0;
StringBuilder buf = new StringBuilder();
for (int i = 0; i < diff.length; ++i) {
Parameter parm = params[i];
if (!diff[i]) {
// expand param
buf.append(query, start, parm.position);
buf.append(parm.value);
start = parm.position+1;
} else {
buf.append(query, start, parm.position);
result[r++] = new Parameter(buf.length(), parm.value);
start = parm.position;
}
}
buf.append(query, start, query.length());
return new ParameterizedQuery(buf.toString(), result);
}
private static Parameter[] keep(Parameter[] params, Parameter[] ref,
boolean[] diff) {
Parameter[] result = new Parameter[ref.length];
int j = 0;
for (int i = 0; i < params.length; ++i) {
if (diff[i]) {
result[j] = new Parameter(ref[j].position, params[i].value);
++j;
}
}
return result;
}
Here's the program that resolves your example:
public class Main {
private static final String[] QUERIES = {
"select * from tableName as t1 where t1.tableColumnId=4 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId",
"select * from tableName as t1 where t1.tableColumnId=6 and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId",
};
private static final Pattern CONST_PATTERN
= Pattern.compile("([^0-9a-zA-Z])((?:[0-9]+(?:\\.[0-9]*)?|[0-9]*\\.[0-9]+)"
+ "(?:[Ee][+-][0-9]+])?"
+ "|(?:\\'[^']*\\')+)", Pattern.CASE_INSENSITIVE);
private static class ParameterizedQuery {
final String sql;
final Parameter[] params;
ParameterizedQuery(String sql, Parameter[] params) {
this.sql = sql;
this.params = params.clone();
}
}
private static class Parameter {
final int position;
final String value;
Parameter(int position, String value) {
this.position = position;
this.value = value;
}
}
public static void main(String[] args) {
ParameterizedQuery[] queries = new ParameterizedQuery[QUERIES.length];
for (int i = 0; i < QUERIES.length; ++i) {
queries[i] = parse(QUERIES[i]);
}
for (ParameterizedQuery cur: queries) {
System.out.println(cur.sql);
int i = 0;
for (Parameter parm: cur.params) {
System.out.println(" " + (++i) + ": " + parm.value);
}
}
queries = simplify(queries);
for (ParameterizedQuery cur: queries) {
System.out.println(cur.sql);
int i = 0;
for (Parameter parm: cur.params) {
System.out.println(" " + (++i) + ": " + parm.value);
}
}
}
private static ParameterizedQuery parse(String query) {
List<Parameter> parms = new ArrayList<>();
Matcher matcher = CONST_PATTERN.matcher(query);
int start = 0;
StringBuilder buf = new StringBuilder();
while (matcher.find()) {
int pos = matcher.start();
buf.append(query, start, pos)
.append(matcher.group(1))
.append("?");
parms.add(new Parameter(buf.length()-1,matcher.group(2)));
start = matcher.end();
}
buf.append(query, start, query.length());
return new ParameterizedQuery(
buf.toString(), parms.toArray(new Parameter[parms.size()]));
}
private static ParameterizedQuery[] simplify(ParameterizedQuery[] queries) {
if (queries.length == 0) {
return queries;
}
ParameterizedQuery prev = null;
boolean[] diff = null;
for (ParameterizedQuery cur: queries) {
if (prev == null) {
diff = new boolean[cur.params.length];
} else {
if (!cur.sql.equals(prev.sql)) {
throw new RuntimeException(
"Queries are too different: [" + prev.sql
+ "] and [" + cur.sql + "]");
} else if (cur.params.length != prev.params.length) {
throw new RuntimeException(
"Different number of parameters: ["
+ prev.params.length
+ "] and [" + cur.params.length + "]");
}
for (int i = 0; i < diff.length; ++i) {
if (!cur.params[i].value.equals(prev.params[i].value)) {
diff[i] = true;
}
}
}
prev = cur;
}
if (and(diff)) {
return queries;
}
ParameterizedQuery[] result = new ParameterizedQuery[queries.length];
result[0] = expandQuery(queries[0].sql, queries[0].params, diff);
for (int i = 1; i < queries.length; ++i) {
result[i] = new ParameterizedQuery(result[0].sql,
keep(queries[i].params, result[0].params, diff));
}
return result;
}
private static boolean and(boolean[] arr) {
for (boolean b: arr) {
if (!b) {
return false;
}
}
return true;
}
private static ParameterizedQuery expandQuery(String query,
Parameter[] params, boolean[] diff) {
int count = 0;
for (boolean b: diff) {
if (b) {
++count;
}
}
Parameter[] result = new Parameter[count];
int r = 0;
int start = 0;
StringBuilder buf = new StringBuilder();
for (int i = 0; i < diff.length; ++i) {
Parameter parm = params[i];
if (!diff[i]) {
// expand param
buf.append(query, start, parm.position);
buf.append(parm.value);
start = parm.position+1;
} else {
buf.append(query, start, parm.position);
result[r++] = new Parameter(buf.length(), parm.value);
start = parm.position;
}
}
buf.append(query, start, query.length());
return new ParameterizedQuery(buf.toString(), result);
}
private static Parameter[] keep(Parameter[] params, Parameter[] ref,
boolean[] diff) {
Parameter[] result = new Parameter[ref.length];
int j = 0;
for (int i = 0; i < params.length; ++i) {
if (diff[i]) {
result[j] = new Parameter(ref[j].position, params[i].value);
++j;
}
}
return result;
}
}
The output is:
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName=? inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
1: 4
2: 'test'
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName=? inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
1: 6
2: 'test'
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
1: 4
select * from tableName as t1 where t1.tableColumnId=? and t1.tableColumnName='test' inner join tableName2 as t2 on t1.tableColumnId=t2.tableColumnId
1: 6
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With