Percona MySQL 5.7
table scheeme:
CREATE TABLE Developer.Rate (
ID bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
TIME datetime NOT NULL,
BASE varchar(3) NOT NULL,
QUOTE varchar(3) NOT NULL,
BID double NOT NULL,
ASK double NOT NULL,
PRIMARY KEY (ID),
INDEX IDX_TIME (TIME),
UNIQUE INDEX IDX_UK (BASE, QUOTE, TIME)
)
ENGINE = INNODB
ROW_FORMAT = COMPRESSED;
I try to make request for latests data before selected period. The optimazer use no-complete unique key, only 2 columns of 3.
If I do request in common way:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1
;
"Explain" shows that only 2 first columns of index are used: BASE, QUOTE
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10231052.40"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate",
"access_type": "ref",
"possible_keys": [
"IDX_UK",
"IDX_TIME"
],
"key": "IDX_UK",
"used_key_parts": [
"BASE",
"QUOTE"
],
"key_length": "22",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 45966462,
"rows_produced_per_join": 22983231,
"filtered": "50.00",
"cost_info": {
"read_cost": "1037760.00",
"eval_cost": "4596646.20",
"prefix_cost": "10231052.40",
"data_read_per_join": "1G"
},
"used_columns": [
"ID",
"TIME",
"BASE",
"QUOTE",
"BID"
],
"attached_condition": "((`Developer`.`Rate`.`BASE` <=> 'EUR') and (`Developer`.`Rate`.`QUOTE` <=> 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))"
}
}
}
}
But if you force the optimizer to use IDX_UK, MySQL uses all 3 columns in the request:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate FORCE INDEX(IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10231052.40"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate",
"access_type": "range",
"possible_keys": [
"IDX_UK"
],
"key": "IDX_UK",
"used_key_parts": [
"BASE",
"QUOTE",
"TIME"
],
"key_length": "27",
"rows_examined_per_scan": 45966462,
"rows_produced_per_join": 15320621,
"filtered": "100.00",
"index_condition": "((`Developer`.`Rate`.`BASE` = 'EUR') and (`Developer`.`Rate`.`QUOTE` = 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))",
"cost_info": {
"read_cost": "1037760.00",
"eval_cost": "3064124.31",
"prefix_cost": "10231052.40",
"data_read_per_join": "818M"
},
"used_columns": [
"ID",
"TIME",
"BASE",
"QUOTE",
"BID"
]
}
}
}
}
Why the optimizer don't use all 3 columns without explicit declaration of index?
Added:
A'm I understanding right, I should to use request like this?
Reuest example:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
BASE DESC, QUOTE DESC, TIME DESC
LIMIT 1
If I understand it right, the output of Explain vouldn't be better. There are still only 2 columns are used without TIME
Explain Output
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10384642.20"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate",
"access_type": "ref",
"possible_keys": [
"IDX_UK",
"IDX_TIME"
],
"key": "IDX_UK",
"used_key_parts": [
"BASE",
"QUOTE"
],
"key_length": "22",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 46734411,
"rows_produced_per_join": 23367205,
"filtered": "50.00",
"index_condition": "((Developer.Rate.BASE <=> 'EUR') and (Developer.Rate.QUOTE <=> 'USD') and (Developer.Rate.TIME <= ((now() - interval 1 month))))",
"cost_info": {
"read_cost": "1037760.00",
"eval_cost": "4673441.10",
"prefix_cost": "10384642.20",
"data_read_per_join": "1G"
},
"used_columns": [
"ID",
"TIME",
"BASE",
"QUOTE",
"BID"
]
}
}
}
}
Added 2:
I made these 4 requests:
— 1 —
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
— 2 —
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate FORCE INDEX (IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';
</code>
— 3 —
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
— 4 —
<code>
FLUSH STATUS;
SELECT
BID
FROM
Rate FORCE INDEX (IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
The output of session_status is the same in all requests except request 3. In output of request 3: Handler_read_prev = 486474; In output of all ather requests: Handler_read_prev = 0;

Added 3:
I made a copy of the table, removed Id field, promoted UNIQUE key as PRIMARY.
The scheme:
CREATE TABLE Developer.Rate2 (
TIME datetime NOT NULL,
BASE varchar(3) NOT NULL,
QUOTE varchar(3) NOT NULL,
BID double NOT NULL,
ASK double NOT NULL,
PRIMARY KEY (BASE, QUOTE, TIME),
INDEX IDX_BID_ASK (BID, ASK)
)
ENGINE = INNODB
AVG_ROW_LENGTH = 26
CHARACTER SET utf8
COLLATE utf8_general_ci
ROW_FORMAT = COMPRESSED;
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "9673452.20"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate2",
"access_type": "range",
"possible_keys": [
"PRIMARY"
],
"key": "PRIMARY",
"used_key_parts": [
"BASE",
"QUOTE",
"TIME"
],
"key_length": "27",
"rows_examined_per_scan": 48023345,
"rows_produced_per_join": 16006180,
"filtered": "100.00",
"cost_info": {
"read_cost": "68783.20",
"eval_cost": "3201236.12",
"prefix_cost": "9673452.20",
"data_read_per_join": "732M"
},
"used_columns": [
"TIME",
"BASE",
"QUOTE",
"BID"
],
"attached_condition": "((`Developer`.`Rate2`.`BASE` = 'EUR') and (`Developer`.`Rate2`.`QUOTE` = 'USD') and (`Developer`.`Rate2`.`TIME` <= <cache>((now() - interval 1 month))))"
}
}
}
}
Now the request really works and Explain shows all 3 columns are used. This variant works.
Get rid of ID, it is of no use. Promote your UNIQUE key to be PRIMARY. Now, magically, the query will be faster, and the Question you posed will become moot. (You may also need the DESC trick that lorraine suggested.)
Here's another technique to compare performance:
FLUSH STATUS;
SELECT ...;
SHOW SESSION STATUS LIKE 'Handler%';
I would be interested to see the output from the SHOW for with and without the DESC trick. And with/without the FORCE INDEX you alluded to.
Why faster? Your query was using a secondary index, but it needed bid, which was not 'covered' by the index. To get bid, the PRIMARY KEY needed to be drilled down in the 'data'. By changing it so that the PK is used, this extra drill-down is obviated.
The behavior you describe (ref access instead of range access over more columns) reminds me of Bug#81341 and Bug#87613. These bugs were fixed in MySQL 5.7.17 and 5.7.21, respectively. Which version are you using?
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With