Skip to content

Commit 546c094

Browse files
DaanHooglandDaan Hoogland
authored andcommitted
[routers] distiction between fatal failure and warning or unknown on healthchecks
1 parent 333f286 commit 546c094

File tree

6 files changed

+29
-19
lines changed

6 files changed

+29
-19
lines changed

api/src/main/java/com/cloud/network/RouterHealthCheckResult.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public interface RouterHealthCheckResult {
2626

2727
String getCheckType();
2828

29-
boolean getCheckResult();
29+
VirtualNetworkApplianceService.RouterHealthStatus getCheckResult();
3030

3131
Date getLastUpdateTime();
3232

api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,8 @@ void startRouterForHA(VirtualMachine vm, Map<VirtualMachineProfile.Param, Object
8787
Pair<Boolean, String> performRouterHealthChecks(long routerId);
8888

8989
<T extends VirtualRouter> void collectNetworkStatistics(T router, Nic nic);
90+
91+
enum RouterHealthStatus{
92+
SUCCESS, FAILURE, WARNING, UNKNOWN;
93+
}
9094
}

api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.util.Date;
2121

22+
import com.cloud.network.VirtualNetworkApplianceService;
2223
import org.apache.cloudstack.api.ApiConstants;
2324
import org.apache.cloudstack.api.BaseResponse;
2425

@@ -36,7 +37,7 @@ public class RouterHealthCheckResultResponse extends BaseResponse {
3637

3738
@SerializedName(ApiConstants.RESULT)
3839
@Param(description = "result of the health check")
39-
private boolean result;
40+
private VirtualNetworkApplianceService.RouterHealthStatus result;
4041

4142
@SerializedName(ApiConstants.LAST_UPDATED)
4243
@Param(description = "the date this VPC was created")
@@ -54,7 +55,7 @@ public String getCheckType() {
5455
return checkType;
5556
}
5657

57-
public boolean getResult() {
58+
public VirtualNetworkApplianceService.RouterHealthStatus getResult() {
5859
return result;
5960
}
6061

@@ -74,7 +75,7 @@ public void setCheckType(String checkType) {
7475
this.checkType = checkType;
7576
}
7677

77-
public void setResult(boolean result) {
78+
public void setResult(VirtualNetworkApplianceService.RouterHealthStatus result) {
7879
this.result = result;
7980
}
8081

engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import javax.persistence.TemporalType;
3030

3131
import com.cloud.network.RouterHealthCheckResult;
32+
import com.cloud.network.VirtualNetworkApplianceService;
3233
import com.cloud.utils.StringUtils;
3334

3435
@Entity
@@ -49,7 +50,7 @@ public class RouterHealthCheckResultVO implements RouterHealthCheckResult {
4950
private String checkType;
5051

5152
@Column(name = "check_result")
52-
private boolean checkResult;
53+
private VirtualNetworkApplianceService.RouterHealthStatus checkResult;
5354

5455
@Temporal(TemporalType.TIMESTAMP)
5556
@Column(name = "last_update", updatable = true, nullable = true)
@@ -87,7 +88,7 @@ public String getCheckType() {
8788
}
8889

8990
@Override
90-
public boolean getCheckResult() {
91+
public VirtualNetworkApplianceService.RouterHealthStatus getCheckResult() {
9192
return checkResult;
9293
}
9394

@@ -105,7 +106,7 @@ public byte[] getCheckDetails() {
105106
return checkDetails;
106107
}
107108

108-
public void setCheckResult(boolean checkResult) {
109+
public void setCheckResult(VirtualNetworkApplianceService.RouterHealthStatus checkResult) {
109110
this.checkResult = checkResult;
110111
}
111112

engine/schema/src/main/resources/META-INF/db/schema-41910to41920.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,7 @@ CALL `cloud`.`IDEMPOTENT_UPDATE_API_PERMISSION`('Read-Only Admin - Default', 'va
4343

4444
CALL `cloud`.`IDEMPOTENT_UPDATE_API_PERMISSION`('Support Admin - Default', 'setupUserTwoFactorAuthentication', 'ALLOW');
4545
CALL `cloud`.`IDEMPOTENT_UPDATE_API_PERMISSION`('Support Admin - Default', 'validateUserTwoFactorAuthenticationCode', 'ALLOW');
46+
47+
-- add status warn and unknown to router health checks
48+
49+
CALL `cloud`.`IDEMPOTENT_CHANGE_COLUMN`('cloud.router_health_check', 'check_result', 'check_result', 'VACHAR(16) NOT NULL COMMENT "check executions for success or (fatal) failure"')

server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1183,20 +1183,20 @@ protected void runInContext() {
11831183
private List<String> getFailingChecks(DomainRouterVO router, GetRouterMonitorResultsAnswer answer) {
11841184

11851185
if (answer == null) {
1186-
logger.warn("Unable to fetch monitor results for router " + router);
1187-
resetRouterHealthChecksAndConnectivity(router.getId(), false, false, "Communication failed");
1186+
logger.warn("Unable to fetch monitor results for router {}", router);
1187+
resetRouterHealthChecksAndConnectivity(router.getId(), RouterHealthStatus.UNKNOWN, RouterHealthStatus.UNKNOWN, "Communication failed");
11881188
return Arrays.asList(CONNECTIVITY_TEST);
11891189
} else if (!answer.getResult()) {
11901190
logger.warn("Failed to fetch monitor results from router " + router + " with details: " + answer.getDetails());
11911191
if (StringUtils.isNotBlank(answer.getDetails()) && answer.getDetails().equalsIgnoreCase(READONLY_FILESYSTEM_ERROR)) {
1192-
resetRouterHealthChecksAndConnectivity(router.getId(), true, false, "Failed to write: " + answer.getDetails());
1192+
resetRouterHealthChecksAndConnectivity(router.getId(), RouterHealthStatus.SUCCESS, RouterHealthStatus.FAILURE, "Failed to write: " + answer.getDetails());
11931193
return Arrays.asList(FILESYSTEM_WRITABLE_TEST);
11941194
} else {
1195-
resetRouterHealthChecksAndConnectivity(router.getId(), false, false, "Failed to fetch results with details: " + answer.getDetails());
1195+
resetRouterHealthChecksAndConnectivity(router.getId(), RouterHealthStatus.FAILURE, RouterHealthStatus.UNKNOWN, "Failed to fetch results with details: " + answer.getDetails());
11961196
return Arrays.asList(CONNECTIVITY_TEST);
11971197
}
11981198
} else {
1199-
resetRouterHealthChecksAndConnectivity(router.getId(), true, true, "Successfully fetched data");
1199+
resetRouterHealthChecksAndConnectivity(router.getId(), RouterHealthStatus.SUCCESS, RouterHealthStatus.SUCCESS, "Successfully fetched data");
12001200
updateDbHealthChecksFromRouterResponse(router, answer.getMonitoringResults());
12011201
return answer.getFailingChecks();
12021202
}
@@ -1295,7 +1295,7 @@ private boolean restartGuestNetworkInDomainRouter(DomainRouterJoinVO router, Use
12951295

12961296
/**
12971297
* Attempts recreation of router by restarting with cleanup a VPC if any or a guest network associated in case no VPC.
1298-
* @param routerId - the id of the router to be recreated.
1298+
* @param router - the router to be recreated.
12991299
* @return true if successfully restart is attempted else false.
13001300
*/
13011301
private boolean recreateRouter(DomainRouterVO router) {
@@ -1335,13 +1335,13 @@ private Map<String, Map<String, RouterHealthCheckResultVO>> getHealthChecksFromD
13351335
return healthCheckResults;
13361336
}
13371337

1338-
private void resetRouterHealthChecksAndConnectivity(final long routerId, boolean connected, boolean writable, String message) {
1338+
private void resetRouterHealthChecksAndConnectivity(final long routerId, VirtualNetworkApplianceService.RouterHealthStatus connected, VirtualNetworkApplianceService.RouterHealthStatus writable, String message) {
13391339
routerHealthCheckResultDao.expungeHealthChecks(routerId);
1340-
updateRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic", connected, connected ? "Successfully connected to router" : message);
1341-
updateRouterHealthCheckResult(routerId, FILESYSTEM_WRITABLE_TEST, "basic", writable, writable ? "Successfully written to file system" : message);
1340+
updateRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic", connected, connected.equals(RouterHealthStatus.SUCCESS) ? "Successfully connected to router" : message);
1341+
updateRouterHealthCheckResult(routerId, FILESYSTEM_WRITABLE_TEST, "basic", writable, writable.equals(RouterHealthStatus.SUCCESS) ? "Successfully written to file system" : message);
13421342
}
13431343

1344-
private void updateRouterHealthCheckResult(final long routerId, String checkName, String checkType, boolean checkResult, String checkMessage) {
1344+
private void updateRouterHealthCheckResult(final long routerId, String checkName, String checkType, VirtualNetworkApplianceService.RouterHealthStatus checkResult, String checkMessage) {
13451345
boolean newHealthCheckEntry = false;
13461346
RouterHealthCheckResultVO connectivityVO = routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, checkName, checkType);
13471347
if (connectivityVO == null) {
@@ -1365,7 +1365,7 @@ private void updateRouterHealthCheckResult(final long routerId, String checkName
13651365
private RouterHealthCheckResultVO parseHealthCheckVOFromJson(final long routerId,
13661366
final String checkName, final String checkType, final Map<String, String> checkData,
13671367
final Map<String, Map<String, RouterHealthCheckResultVO>> checksInDb) {
1368-
boolean success = Boolean.parseBoolean(checkData.get("success"));
1368+
VirtualNetworkApplianceService.RouterHealthStatus success = RouterHealthStatus.valueOf(checkData.get("success"));
13691369
Date lastUpdate = new Date(Long.parseLong(checkData.get("lastUpdate")));
13701370
double lastRunDuration = Double.parseDouble(checkData.get("lastRunDuration"));
13711371
String message = checkData.get("message");
@@ -1572,7 +1572,7 @@ public Pair<Boolean, String> performRouterHealthChecks(long routerId) {
15721572
List<String> failingChecks = getFailingChecks(router, answer);
15731573
handleFailingChecks(router, failingChecks);
15741574

1575-
return new Pair<Boolean, String>(success, resultDetails);
1575+
return new Pair<>(success, resultDetails);
15761576
}
15771577

15781578
protected class UpdateRouterHealthChecksConfigTask extends ManagedContextRunnable {

0 commit comments

Comments
 (0)