Skip to content

panic: concurrent write and read in sync.map for OpInfluence #9951

@bufferflies

Description

@bufferflies

Bug Report

fatal error: concurrent map iteration and map write

goroutine 3102510418 [running]:
github.com/tikv/pd/pkg/schedule/operator.(*OpInfluence).Add(0xc1d10fcb78, 0xc0f7cea270?)
        /workspace/source/pd/pkg/schedule/operator/influence.go:37 +0x67
github.com/tikv/pd/pkg/schedule/operator.(*Operator).TotalInfluence(0xc05cc7bce0?, {0xc0f7cea270}, 0xc04a0a2f00?)
        /workspace/source/pd/pkg/schedule/operator/operator.go:437 +0xb9
github.com/tikv/pd/pkg/schedule/operator.AddOpInfluence(0xc05cc7bce0, {0x0?}, 0x1?)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:888 +0x3d
github.com/tikv/pd/pkg/schedule/operator.NewTotalOpInfluence({0xc1d10fcce8, 0x1, 0xc1d10fcc40?}, 0xc0093e8150)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:896 +0x4c
github.com/tikv/pd/pkg/schedule/operator.(*Controller).ack(0xc09d088870, 0xc1d10fcd38?)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:583 +0x46
github.com/tikv/pd/pkg/schedule/operator.(*Controller).removeOperatorInner(0xc09d088870, 0xc05cc7bce0)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:660 +0x115
github.com/tikv/pd/pkg/schedule/operator.(*Controller).RemoveOperator(0xc09d088870, 0xc05cc7bce0, {0x0, 0x0, 0x109ce70?})
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:633 +0x45
github.com/tikv/pd/pkg/schedule/operator.(*Controller).Dispatch(0xc09d088870, 0xc04a0a2f00, {0x32eb780, 0x9}, 0xc1d10fd1b0)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:157 +0x82b
github.com/tikv/pd/server/cluster.(*RaftCluster).HandleRegionHeartbeat(0xc0093b6c00, 0xc04a0a2f00)
        /workspace/source/pd/server/cluster/cluster_worker.go:68 +0x3e5
github.com/tikv/pd/server.(*GrpcServer).RegionHeartbeat(0xc00051b040, {0x3f5a490, 0xc1f7a14920})
        /workspace/source/pd/server/grpc_service.go:1359 +0xe17
github.com/pingcap/kvproto/pkg/pdpb._PD_RegionHeartbeat_Handler({0x32d6800?, 0xc00051b040}, {0x3f53db0, 0xc09ede9020})
        /root/go/pkg/mod/github.com/pingcap/[email protected]/pkg/pdpb/pdpb.pb.go:10012 +0xdb
github.com/grpc-ecosystem/go-grpc-prometheus.init.(*ServerMetrics).StreamServerInterceptor.func4({0x32d6800, 0xc00051b040}, {0x3f541e8, 0xc1e7586870}, 0xc09ede9008, 0x3c68538)
        /root/go/pkg/mod/github.com/grpc-ecosystem/[email protected]/server_metrics.go:121 +0xd2
go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc.Server.ChainStreamServer.func9.1({0x32d6800?, 0xc00051b040?}, {0x3f541e8?, 0xc1e7586870?})
        /root/go/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:83 +0x45
go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc.newStreamInterceptor.func1({0x32d6800, 0xc00051b040}, {0x3f541e8, 0xc1e7586870}, 0xc09ede9008, 0xc0ddf021c0)
        /root/go/pkg/mod/go.etcd.io/etcd/server/[email protected]/etcdserver/api/v3rpc/interceptor.go:252 +0x46a
go.etcd.io/etcd/server/v3/etcdserver/api/v3rpc.Server.ChainStreamServer.func9({0x32d6800, 0xc00051b040}, {0x3f541e8, 0xc1e7586870}, 0xc09ede9008, 0xc1f7a148d0?)
        /root/go/pkg/mod/github.com/grpc-ecosystem/[email protected]/chain.go:86 +0x123
google.golang.org/grpc.(*Server).processStreamingRPC(0xc005606000, {0x3f4d1b8, 0xc1f7a114a0}, {0x3f5be20, 0xc07cd0da00}, 0xc1f7a2a240, 0xc008083500, 0x5510e60, 0x0)
        /root/go/pkg/mod/google.golang.org/[email protected]/server.go:1687 +0x11e7
google.golang.org/grpc.(*Server).handleStream(0xc005606000, {0x3f5be20, 0xc07cd0da00}, 0xc1f7a2a240)
        /root/go/pkg/mod/google.golang.org/[email protected]/server.go:1801 +0xe36
google.golang.org/grpc.(*Server).serveStreams.func2.1()
        /root/go/pkg/mod/google.golang.org/[email protected]/server.go:1027 +0x7f
created by google.golang.org/grpc.(*Server).serveStreams.func2 in goroutine 3102447602
        /root/go/pkg/mod/google.golang.org/[email protected]/server.go:1038 +0x125

fatal error: concurrent map writes

goroutine 90595506 [running]:
github.com/tikv/pd/pkg/schedule/operator.OpInfluence.GetStoreInfluence(...)
        /workspace/source/pd/pkg/schedule/operator/influence.go:47
github.com/tikv/pd/pkg/schedule/operator.TransferLeader.Influence({0x54c55246, 0x54c51eca, {0xc1eb423bf0, 0x2, 0x2}}, {0xc089bc3180?}, 0xc1af013300)
        /workspace/source/pd/pkg/schedule/operator/step.go:115 +0x85
github.com/tikv/pd/pkg/schedule/operator.(*Operator).TotalInfluence(0xc06d8d9b80, {0xc1dd889560}, 0xc1af013300)
        /workspace/source/pd/pkg/schedule/operator/operator.go:434 +0xe5
github.com/tikv/pd/pkg/schedule/operator.AddOpInfluence(0xc06d8d9b80, {0x1587b71?}, 0xc007e71dc0?)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:888 +0x3d
github.com/tikv/pd/pkg/schedule/operator.NewTotalOpInfluence({0xc012957b10, 0x1, 0xc019304180?}, 0xc00ce14020)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:896 +0x4c
github.com/tikv/pd/pkg/schedule/operator.(*Controller).addOperatorInner(0xc04ac84f30, 0xc06d8d9b80)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:549 +0x79f
github.com/tikv/pd/pkg/schedule/operator.(*Controller).PromoteWaitingOperator(0xc04ac84f30)
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:413 +0x1c5
github.com/tikv/pd/pkg/schedule/operator.(*Controller).AddWaitingOperator(0xc04ac84f30, {0xc0e6693c08, 0x1, 0x0?})
        /workspace/source/pd/pkg/schedule/operator/operator_controller.go:345 +0x825
github.com/tikv/pd/pkg/schedule/schedulers.(*Controller).runScheduler(0xc00f7b6f50, 0xc017527aa0)
        /workspace/source/pd/pkg/schedule/schedulers/scheduler_controller.go:376 +0x22b
created by github.com/tikv/pd/pkg/schedule/schedulers.(*Controller).AddScheduler in goroutine 9059537
        /workspace/source/pd/pkg/schedule/schedulers/scheduler_controller.go:215 +0x1cb

What did you do?

What did you expect to see?

What did you see instead?

no panic

What version of PD are you using (pd-server -V)?

v8.5.2

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions