Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions mysql-test/suite/innodb/r/index_lock_upgrade.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# MDEV-38814: btr_cur_need_opposite_intention() should not
# trigger index lock upgrade on the root page
#
# Test with simple table structure:
# Single INT primary key
# DATETIME column dt that starts NULL and gets updated to a
# distinct timestamp per row
#
# The NULL-to-DATETIME growth makes btr_cur_optimistic_update()
# return DB_UNDERFLOW (TODO: Why? DB_OVERFLOW was expected),
# falling through to the pessimistic path (BTR_MODIFY_TREE).
CREATE TABLE t1 (
id INT NOT NULL,
dt DATETIME NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB;
INSERT INTO t1 (id, dt) SELECT seq, NULL FROM seq_1_to_1000;
# Rows inserted with NULL DATETIME
SELECT COUNT(*) AS rows_inserted FROM t1;
rows_inserted
1000
SELECT COUNT(DISTINCT dt) AS distinct_timestamps FROM t1;
distinct_timestamps
0
# Index lock upgrades during inserts (no reduction w.r.t. root page checks)
SELECT @u01 AS index_lock_upgrades_insert;
index_lock_upgrades_insert
5
SELECT @r01 AS need_opposite_intention_root_insert;
need_opposite_intention_root_insert
5
# Now update all rows (batch update)
# This changes DATETIME from NULL to a distinct timestamp per row,
# triggering the pessimistic path due to record size change.
UPDATE t1 JOIN seq_1_to_1000 s ON t1.id = s.seq
SET t1.dt = '2025-01-01' + INTERVAL s.seq SECOND;
# Rows updated
SELECT COUNT(DISTINCT dt) AS distinct_timestamps FROM t1;
distinct_timestamps
1000
# Index lock upgrades during updates (with reduction w.r.t. root page checks)
SELECT @u12 AS index_lock_upgrades_update;
index_lock_upgrades_update
120
SELECT @r12 AS need_opposite_intention_root_update;
need_opposite_intention_root_update
414
DROP TABLE t1;
3 changes: 2 additions & 1 deletion mysql-test/suite/innodb/r/innodb_status_variables.result
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ WHERE variable_name LIKE 'INNODB_%'
AND variable_name NOT IN
('INNODB_ADAPTIVE_HASH_HASH_SEARCHES','INNODB_ADAPTIVE_HASH_NON_HASH_SEARCHES',
'INNODB_MEM_ADAPTIVE_HASH',
'INNODB_BUFFERED_AIO_SUBMITTED','INNODB_BUFFER_POOL_PAGES_LATCHED');
'INNODB_BUFFERED_AIO_SUBMITTED','INNODB_BUFFER_POOL_PAGES_LATCHED',
'INNODB_BTR_INDEX_LOCK_UPGRADES','INNODB_BTR_NEED_OPPOSITE_INTENTION_ROOT');
variable_name
INNODB_ASYNC_READS_PENDING
INNODB_ASYNC_READS_TASKS_RUNNING
Expand Down
74 changes: 74 additions & 0 deletions mysql-test/suite/innodb/t/index_lock_upgrade.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
--source include/have_innodb.inc
--source include/have_innodb_4k.inc
--source include/have_sequence.inc
--source include/have_debug.inc

--echo #
--echo # MDEV-38814: btr_cur_need_opposite_intention() should not
--echo # trigger index lock upgrade on the root page
--echo #

--echo # Test with simple table structure:
--echo # Single INT primary key
--echo # DATETIME column dt that starts NULL and gets updated to a
--echo # distinct timestamp per row
--echo #
--echo # The NULL-to-DATETIME growth makes btr_cur_optimistic_update()
--echo # return DB_UNDERFLOW (TODO: Why? DB_OVERFLOW was expected),
--echo # falling through to the pessimistic path (BTR_MODIFY_TREE).
CREATE TABLE t1 (
id INT NOT NULL,
dt DATETIME NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB;

let $u0 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_index_lock_upgrades'`;
let $r0 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_need_opposite_intention_root'`;

INSERT INTO t1 (id, dt) SELECT seq, NULL FROM seq_1_to_1000;

--echo # Rows inserted with NULL DATETIME
SELECT COUNT(*) AS rows_inserted FROM t1;
SELECT COUNT(DISTINCT dt) AS distinct_timestamps FROM t1;

let $u1 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_index_lock_upgrades'`;
let $r1 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_need_opposite_intention_root'`;

--disable_query_log
eval SET @u01 = $u1 - $u0;
eval SET @r01 = $r1 - $r0;
--enable_query_log

--echo # Index lock upgrades during inserts (no reduction w.r.t. root page checks)
SELECT @u01 AS index_lock_upgrades_insert;
SELECT @r01 AS need_opposite_intention_root_insert;

--echo # Now update all rows (batch update)
--echo # This changes DATETIME from NULL to a distinct timestamp per row,
--echo # triggering the pessimistic path due to record size change.

UPDATE t1 JOIN seq_1_to_1000 s ON t1.id = s.seq
SET t1.dt = '2025-01-01' + INTERVAL s.seq SECOND;

--echo # Rows updated
SELECT COUNT(DISTINCT dt) AS distinct_timestamps FROM t1;

let $u2 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_index_lock_upgrades'`;
let $r2 = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'Innodb_btr_need_opposite_intention_root'`;

--disable_query_log
eval SET @u12 = $u2 - $u1;
eval SET @r12 = $r2 - $r1;
--enable_query_log

--echo # Index lock upgrades during updates (with reduction w.r.t. root page checks)
SELECT @u12 AS index_lock_upgrades_update;
SELECT @r12 AS need_opposite_intention_root_update;

DROP TABLE t1;
3 changes: 2 additions & 1 deletion mysql-test/suite/innodb/t/innodb_status_variables.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ WHERE variable_name LIKE 'INNODB_%'
AND variable_name NOT IN
('INNODB_ADAPTIVE_HASH_HASH_SEARCHES','INNODB_ADAPTIVE_HASH_NON_HASH_SEARCHES',
'INNODB_MEM_ADAPTIVE_HASH',
'INNODB_BUFFERED_AIO_SUBMITTED','INNODB_BUFFER_POOL_PAGES_LATCHED');
'INNODB_BUFFERED_AIO_SUBMITTED','INNODB_BUFFER_POOL_PAGES_LATCHED',
'INNODB_BTR_INDEX_LOCK_UPGRADES','INNODB_BTR_NEED_OPPOSITE_INTENTION_ROOT');
59 changes: 46 additions & 13 deletions storage/innobase/btr/btr0cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ ulint btr_cur_n_sea_old;
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
uint btr_cur_limit_optimistic_insert_debug;
/** Number of times index lock was upgraded from SX to X */
Atomic_counter<size_t> btr_cur_n_index_lock_upgrades;
/** Number of times btr_cur_need_opposite_intention() was called on the root */
Atomic_counter<size_t> btr_cur_n_need_opposite_intention_root;
#endif /* UNIV_DEBUG */

/** In the optimistic insert, if the insert does not fit, but this much space
Expand Down Expand Up @@ -777,22 +781,43 @@ btr_cur_will_modify_tree(
/** Detects whether the modifying record might need a opposite modification
to the intention.
@param bpage buffer pool page
@param is_clust whether this is a clustered index
@param index index tree
@param lock_intention lock intention for the tree operation
@param node_ptr_max_size the maximum size of a node pointer
@param compress_limit BTR_CUR_PAGE_COMPRESS_LIMIT(index)
@param rec record (current node_ptr)
@return true if tree modification is needed */
static bool btr_cur_need_opposite_intention(const buf_page_t &bpage,
bool is_clust,
const dict_index_t &index,
btr_intention_t lock_intention,
ulint node_ptr_max_size,
ulint compress_limit,
const rec_t *rec)
{
ut_ad(bpage.frame == page_align(rec));
/* Every check below detects whether a structural change at this
page (e.g. a page split inserting a node pointer into the parent,
a merge with a sibling removing one, or a boundary change updating
one) could cascade into its parent. Handling such a structural change
would also require latching sibling pages at this level, which cannot
be safely acquired under SX without risking deadlock with concurrent
readers; hence the caller must upgrade the index lock from SX to X.

The root has no parent and no siblings, so none of those cascades
apply and no sibling latches are needed.

A root page split (btr_root_raise_and_insert()) is a special case
that changes the tree height, but it never cascades upward into a
parent. See the comment and assertion in
btr_cur_search_to_nth_level() for why cascades cannot reach the
unlatched root under SX. */
if (bpage.id().page_no() == index.page)
{
ut_d(++btr_cur_n_need_opposite_intention_root);
return false;
}
if (UNIV_LIKELY_NULL(bpage.zip.data) &&
!page_zip_available(&bpage.zip, is_clust, node_ptr_max_size, 1))
!page_zip_available(&bpage.zip, index.is_clust(), node_ptr_max_size, 1))
return true;
const page_t *const page= bpage.frame;
if (lock_intention != BTR_INTENTION_INSERT)
Expand Down Expand Up @@ -1416,7 +1441,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_ad(low_match != uint16_t(~0U) || mode != PAGE_CUR_LE);

if (latch_mode == BTR_MODIFY_TREE &&
btr_cur_need_opposite_intention(block->page, index()->is_clust(),
btr_cur_need_opposite_intention(block->page, *index(),
lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
Expand Down Expand Up @@ -1449,7 +1474,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
default:
break;
case BTR_MODIFY_TREE:
if (btr_cur_need_opposite_intention(block->page, index()->is_clust(),
if (btr_cur_need_opposite_intention(block->page, *index(),
lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
Expand Down Expand Up @@ -1599,6 +1624,7 @@ ATTRIBUTE_COLD void mtr_t::index_lock_upgrade()
index_lock *lock= static_cast<index_lock*>(slot.object);
lock->u_x_upgrade(SRW_LOCK_CALL);
slot.type= MTR_MEMO_X_LOCK;
ut_d(++btr_cur_n_index_lock_upgrades);
}

/** Mark a non-leaf page "least recently used", but avoid invoking
Expand Down Expand Up @@ -1803,14 +1829,21 @@ dberr_t btr_cur_search_to_nth_level(ulint level,
if (buf_block_t *b=
mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch)))
block= b;
else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch,
block, BUF_GET, mtr, &err)))
{
btr_read_failed(err, *index);
goto func_exit;
}
else
{
/* Freshly acquiring the root page is only permitted under X-lock
or when no page latch is held (brand-new descent). Under SX with
existing page latches, the root must already be latched in the
mtr, retained from the original search_leaf() descent. */
ut_ad(height != ULINT_UNDEFINED ||
mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK) ||
(mtr->get_savepoint() == 1 && !mtr->block_at_savepoint(0)));
if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch,
block, BUF_GET, mtr, &err)))
{
btr_read_failed(err, *index);
goto func_exit;
}
btr_search_drop_page_hash_index(block, index);
btr_cur_nonleaf_make_young(&block->page);
}
Expand Down Expand Up @@ -1992,7 +2025,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
break;

if (!index->lock.have_x() &&
btr_cur_need_opposite_intention(block->page, index->is_clust(),
btr_cur_need_opposite_intention(block->page, *index,
lock_intention,
node_ptr_max_size,
compress_limit, page_cur.rec))
Expand Down Expand Up @@ -2040,7 +2073,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
if (!height && first && first_access)
buf_read_ahead_linear(page_id_t(block->page.id().space(), page));
}
else if (btr_cur_need_opposite_intention(block->page, index->is_clust(),
else if (btr_cur_need_opposite_intention(block->page, *index,
lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
Expand Down
7 changes: 7 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,13 @@ static SHOW_VAR innodb_status_variables[]= {
/* InnoDB bulk operations */
{"bulk_operations", &export_vars.innodb_bulk_operations, SHOW_SIZE_T},

#ifdef UNIV_DEBUG
{"btr_index_lock_upgrades",
&btr_cur_n_index_lock_upgrades, SHOW_SIZE_T},
{"btr_need_opposite_intention_root",
&btr_cur_n_need_opposite_intention_root, SHOW_SIZE_T},
#endif /* UNIV_DEBUG */

{NullS, NullS, SHOW_LONG}
};

Expand Down
4 changes: 4 additions & 0 deletions storage/innobase/include/btr0cur.h
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,10 @@ extern ulint btr_cur_n_sea_old;
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
/** Number of times index lock was upgraded from SX to X */
extern Atomic_counter<size_t> btr_cur_n_index_lock_upgrades;
/** Number of times btr_cur_need_opposite_intention() was called on the root */
extern Atomic_counter<size_t> btr_cur_n_need_opposite_intention_root;
#endif /* UNIV_DEBUG */

#include "btr0cur.inl"
Expand Down
Loading