Skip to content

Commit 52a4937

Browse files
authored
bugfix: agent cannot reconnect after server is down (#79)
1 parent 8b8d405 commit 52a4937

File tree

2 files changed

+27
-13
lines changed

2 files changed

+27
-13
lines changed

skywalking/agent/__init__.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,9 @@ def __heartbeat():
4040
def __report():
4141
while not __finished.is_set():
4242
if connected():
43-
__protocol.report(__queue)
44-
break
45-
else:
46-
__finished.wait(1)
43+
__protocol.report(__queue) # is blocking actually
44+
45+
__finished.wait(1)
4746

4847

4948
__heartbeat_thread = Thread(name='HeartbeatThread', target=__heartbeat, daemon=True)

skywalking/agent/protocol/grpc.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#
1717

1818
import logging
19+
import traceback
1920
from queue import Queue
2021

2122
import grpc
@@ -40,22 +41,33 @@ def __init__(self):
4041
self.channel, header_adder_interceptor('authentication', config.authentication)
4142
)
4243

43-
def cb(state):
44-
logger.debug('grpc channel connectivity changed, [%s -> %s]', self.state, state)
45-
self.state = state
46-
if self.connected():
47-
self.service_management.send_instance_props()
48-
49-
self.channel.subscribe(cb, try_to_connect=True)
44+
self.channel.subscribe(self._cb, try_to_connect=True)
5045
self.service_management = GrpcServiceManagementClient(self.channel)
5146
self.traces_reporter = GrpcTraceSegmentReportService(self.channel)
5247

48+
def _cb(self, state):
49+
logger.debug('grpc channel connectivity changed, [%s -> %s]', self.state, state)
50+
self.state = state
51+
if self.connected():
52+
try:
53+
self.service_management.send_instance_props()
54+
except grpc.RpcError:
55+
self.on_error()
56+
5357
def heartbeat(self):
54-
self.service_management.send_heart_beat()
58+
try:
59+
self.service_management.send_heart_beat()
60+
except grpc.RpcError:
61+
self.on_error()
5562

5663
def connected(self):
5764
return self.state == grpc.ChannelConnectivity.READY
5865

66+
def on_error(self):
67+
traceback.print_exc()
68+
self.channel.unsubscribe(self._cb)
69+
self.channel.subscribe(self._cb, try_to_connect=True)
70+
5971
def report(self, queue: Queue):
6072
def generator():
6173
while True:
@@ -104,4 +116,7 @@ def generator():
104116

105117
queue.task_done()
106118

107-
self.traces_reporter.report(generator())
119+
try:
120+
self.traces_reporter.report(generator())
121+
except grpc.RpcError:
122+
self.on_error()

0 commit comments

Comments
 (0)