diff --git a/api/src/main/java/com/cloud/event/EventTypes.java b/api/src/main/java/com/cloud/event/EventTypes.java index a30518aaf176..d9a70b98e89e 100644 --- a/api/src/main/java/com/cloud/event/EventTypes.java +++ b/api/src/main/java/com/cloud/event/EventTypes.java @@ -19,6 +19,13 @@ import java.util.HashMap; import java.util.Map; +import org.apache.cloudstack.acl.Role; +import org.apache.cloudstack.acl.RolePermission; +import org.apache.cloudstack.annotation.Annotation; +import org.apache.cloudstack.config.Configuration; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.usage.Usage; + import com.cloud.dc.DataCenter; import com.cloud.dc.Pod; import com.cloud.dc.StorageNetworkIpRange; @@ -69,12 +76,6 @@ import com.cloud.vm.Nic; import com.cloud.vm.NicSecondaryIp; import com.cloud.vm.VirtualMachine; -import org.apache.cloudstack.acl.Role; -import org.apache.cloudstack.acl.RolePermission; -import org.apache.cloudstack.annotation.Annotation; -import org.apache.cloudstack.config.Configuration; -import org.apache.cloudstack.ha.HAConfig; -import org.apache.cloudstack.usage.Usage; public class EventTypes { @@ -106,6 +107,7 @@ public class EventTypes { public static final String EVENT_ROUTER_HA = "ROUTER.HA"; public static final String EVENT_ROUTER_UPGRADE = "ROUTER.UPGRADE"; public static final String EVENT_ROUTER_DIAGNOSTICS = "ROUTER.DIAGNOSTICS"; + public static final String EVENT_ROUTER_HEALTH_CHECKS = "ROUTER.HEALTH.CHECKS"; // Console proxy public static final String EVENT_PROXY_CREATE = "PROXY.CREATE"; @@ -603,6 +605,7 @@ public class EventTypes { entityEventDetails.put(EVENT_ROUTER_HA, VirtualRouter.class); entityEventDetails.put(EVENT_ROUTER_UPGRADE, VirtualRouter.class); entityEventDetails.put(EVENT_ROUTER_DIAGNOSTICS, VirtualRouter.class); + entityEventDetails.put(EVENT_ROUTER_HEALTH_CHECKS, VirtualRouter.class); entityEventDetails.put(EVENT_PROXY_CREATE, VirtualMachine.class); entityEventDetails.put(EVENT_PROXY_DESTROY, VirtualMachine.class); diff --git a/api/src/main/java/com/cloud/network/NetworkService.java b/api/src/main/java/com/cloud/network/NetworkService.java index 04f240b5e13e..f90c3d423b2b 100644 --- a/api/src/main/java/com/cloud/network/NetworkService.java +++ b/api/src/main/java/com/cloud/network/NetworkService.java @@ -72,7 +72,9 @@ IpAddress allocatePortableIP(Account ipOwner, int regionId, Long zoneId, Long ne boolean deleteNetwork(long networkId, boolean forced); - boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + + boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; int getActiveNicsInNetwork(long networkId); diff --git a/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java b/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java new file mode 100644 index 000000000000..eb65ae9088ec --- /dev/null +++ b/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network; + +import java.util.Date; + +public interface RouterHealthCheckResult { + long getRouterId(); + + String getCheckName(); + + String getCheckType(); + + boolean getCheckResult(); + + Date getLastUpdateTime(); + + String getParsedCheckDetails(); +} diff --git a/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java b/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java index 815ae4d6ae31..98fb8be7c7a9 100644 --- a/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java +++ b/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java @@ -31,8 +31,7 @@ public interface VirtualNetworkApplianceService { /** * Starts domain router * - * @param cmd - * the command specifying router's id + * @param cmd the command specifying router's id * @return DomainRouter object */ VirtualRouter startRouter(long routerId, boolean reprogramNetwork) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; @@ -51,10 +50,8 @@ public interface VirtualNetworkApplianceService { /** * Stops domain router * - * @param id - * of the router - * @param forced - * just do it. caller knows best. + * @param id of the router + * @param forced just do it. caller knows best. * @return router if successful, null otherwise * @throws ResourceUnavailableException * @throws ConcurrentOperationException @@ -68,4 +65,13 @@ public interface VirtualNetworkApplianceService { VirtualRouter findRouter(long routerId); List upgradeRouterTemplate(UpgradeRouterTemplateCmd cmd); + + /** + * Updates router with latest health checkdata, runs health checks and persists health checks on virtual router if feasible. + * Throws relevant exception if feature is disabled or failures occur. + * + * @param routerId id of the router + * @return + */ + boolean performRouterHealthChecks(long routerId); } diff --git a/api/src/main/java/com/cloud/network/vpc/VpcService.java b/api/src/main/java/com/cloud/network/vpc/VpcService.java index 241e27b1552e..5fc339e64cb8 100644 --- a/api/src/main/java/com/cloud/network/vpc/VpcService.java +++ b/api/src/main/java/com/cloud/network/vpc/VpcService.java @@ -21,6 +21,7 @@ import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd; import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd; +import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd; import com.cloud.exception.ConcurrentOperationException; import com.cloud.exception.InsufficientAddressCapacityException; @@ -29,6 +30,7 @@ import com.cloud.exception.ResourceAllocationException; import com.cloud.exception.ResourceUnavailableException; import com.cloud.network.IpAddress; +import com.cloud.user.User; import com.cloud.utils.Pair; public interface VpcService { @@ -132,7 +134,9 @@ public Pair, Integer> listVpcs(Long id, String vpcName, Stri * @return * @throws InsufficientCapacityException */ - boolean restartVpc(long id, boolean cleanUp, boolean makeredundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + boolean restartVpc(RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + + boolean restartVpc(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; /** * Returns a Private gateway found in the VPC by id diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index 44c53f690fb8..ee76b9aa1672 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -724,6 +724,7 @@ public class ApiConstants { public static final String VIRTUAL_SIZE = "virtualsize"; public static final String NETSCALER_CONTROLCENTER_ID = "netscalercontrolcenterid"; public static final String NETSCALER_SERVICEPACKAGE_ID = "netscalerservicepackageid"; + public static final String FETCH_ROUTER_HEALTH_CHECK_RESULTS = "fetchhealthcheckresults"; public static final String ZONE_ID_LIST = "zoneids"; public static final String DESTINATION_ZONE_ID_LIST = "destzoneids"; @@ -745,6 +746,13 @@ public class ApiConstants { public static final String FILES = "files"; public static final String VOLUME_IDS = "volumeids"; + public static final String ROUTER_ID = "routerid"; + public static final String ROUTER_HEALTH_CHECKS = "healthchecks"; + public static final String ROUTER_CHECK_NAME = "checkname"; + public static final String ROUTER_CHECK_TYPE = "checktype"; + public static final String LAST_UPDATED = "lastupdated"; + public static final String PERFORM_FRESH_CHECKS = "performfreshchecks"; + public enum HostDetails { all, capacity, events, stats, min; } diff --git a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java index 740ee468702f..510562bf54ae 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java +++ b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.Set; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.affinity.AffinityGroup; import org.apache.cloudstack.affinity.AffinityGroupResponse; @@ -146,6 +147,7 @@ import com.cloud.network.PhysicalNetworkServiceProvider; import com.cloud.network.PhysicalNetworkTrafficType; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.Site2SiteVpnGateway; @@ -466,4 +468,6 @@ List createTemplateResponses(ResponseView view, VirtualMachine SSHKeyPairResponse createSSHKeyPairResponse(SSHKeyPair sshkeyPair, boolean privatekey); ManagementServerResponse createManagementResponse(ManagementServerHost mgmt); + + List createHealthCheckResponse(VirtualMachine router, List healthCheckResults); } diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java index ba2054c3c24b..fd415858f564 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java @@ -16,6 +16,7 @@ // under the License. package org.apache.cloudstack.api.command.admin.internallb; +import org.apache.commons.lang.BooleanUtils; import org.apache.log4j.Logger; import org.apache.cloudstack.api.APICommand; @@ -73,6 +74,11 @@ public class ListInternalLBVMsCmd extends BaseListProjectAndAccountResourcesCmd @Parameter(name = ApiConstants.FOR_VPC, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, list only VPC Internal LB VMs") private Boolean forVpc; + + @Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14", + description = "if true is passed for this parameter, also fetch last executed health check results for the VM. Default is false") + private Boolean fetchHealthCheckResults; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -117,6 +123,10 @@ public String getRole() { return Role.INTERNAL_LB_VM.toString(); } + public boolean shouldFetchHealthCheckResults() { + return BooleanUtils.isTrue(fetchHealthCheckResults); + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java new file mode 100644 index 000000000000..5efc6de9e948 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.router; + +import java.util.List; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.DomainRouterResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultsListResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.commons.lang.BooleanUtils; +import org.apache.log4j.Logger; + +import com.cloud.exception.InvalidParameterValueException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.network.router.VirtualRouter; +import com.cloud.user.Account; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.VirtualMachine; + +@APICommand(name = GetRouterHealthCheckResultsCmd.APINAME, + responseObject = RouterHealthCheckResultsListResponse.class, + description = "Starts a router.", + entityType = {VirtualMachine.class}, + requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, + since = "4.14.0") +public class GetRouterHealthCheckResultsCmd extends BaseCmd { + public static final Logger s_logger = Logger.getLogger(GetRouterHealthCheckResultsCmd.class.getName()); + public static final String APINAME = "getRouterHealthCheckResults"; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ROUTER_ID, type = CommandType.UUID, entityType = DomainRouterResponse.class, + required = true, description = "the ID of the router") + private Long routerId; + + @Parameter(name = ApiConstants.PERFORM_FRESH_CHECKS, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, " + + "health checks are performed on the fly. Else last performed checks data is fetched") + private Boolean performFreshChecks; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getRouterId() { + return routerId; + } + + public boolean shouldPerformFreshChecks() { + return BooleanUtils.isTrue(performFreshChecks); + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + VirtualRouter router = _entityMgr.findById(VirtualRouter.class, getRouterId()); + if (router != null) { + return router.getAccountId(); + } + + return Account.ACCOUNT_ID_SYSTEM; + } + + @Override + public void execute() throws ResourceUnavailableException, InvalidParameterValueException, ServerApiException { + CallContext.current().setEventDetails("Router Id: " + this._uuidMgr.getUuid(VirtualMachine.class, getRouterId())); + VirtualRouter router = _routerService.findRouter(getRouterId()); + if (router == null || router.getRole() != VirtualRouter.Role.VIRTUAL_ROUTER) { + throw new InvalidParameterValueException("Can't find router by routerId"); + } + + try { + List healthChecks = _queryService.listRouterHealthChecks(this); + RouterHealthCheckResultsListResponse routerResponse = new RouterHealthCheckResultsListResponse(); + routerResponse.setRouterId(router.getUuid()); + routerResponse.setHealthChecks(healthChecks); + routerResponse.setObjectName("routerhealthchecks"); + routerResponse.setResponseName(getCommandName()); + setResponseObject(routerResponse); + } catch (CloudRuntimeException ex){ + ex.printStackTrace(); + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to execute command due to exception: " + ex.getLocalizedMessage()); + } + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java index 121fc5bc14d2..4fabcf5df768 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java @@ -16,6 +16,7 @@ // under the License. package org.apache.cloudstack.api.command.admin.router; +import org.apache.commons.lang.BooleanUtils; import org.apache.log4j.Logger; import org.apache.cloudstack.api.APICommand; @@ -80,6 +81,10 @@ public class ListRoutersCmd extends BaseListProjectAndAccountResourcesCmd { @Parameter(name = ApiConstants.VERSION, type = CommandType.STRING, description = "list virtual router elements by version") private String version; + @Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14", + description = "if true is passed for this parameter, also fetch last executed health check results for the router. Default is false") + private Boolean fetchHealthCheckResults; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -132,6 +137,11 @@ public String getRole() { return Role.VIRTUAL_ROUTER.toString(); } + public boolean shouldFetchHealthCheckResults() { + return BooleanUtils.isTrue(fetchHealthCheckResults); + } + + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java index 645ae5aff8e3..d422966388cd 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java @@ -96,7 +96,7 @@ public static String getResultObjectName() { @Override public void execute() throws ResourceUnavailableException, ResourceAllocationException, ConcurrentOperationException, InsufficientCapacityException { - boolean result = _networkService.restartNetwork(this, getCleanup(), getMakeRedundant()); + boolean result = _networkService.restartNetwork(this); if (result) { SuccessResponse response = new SuccessResponse(getCommandName()); setResponseObject(response); diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java index edfd93e785ec..8ed2ab26a225 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java @@ -91,7 +91,7 @@ public long getEntityOwnerId() { @Override public void execute() { try { - final boolean result = _vpcService.restartVpc(getId(), getCleanup(), getMakeredundant()); + final boolean result = _vpcService.restartVpc(this); if (result) { final SuccessResponse response = new SuccessResponse(getCommandName()); setResponseObject(response); diff --git a/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java index 131e3e1de7ed..97e3b0160d91 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java @@ -18,10 +18,9 @@ import java.util.Date; import java.util.LinkedHashSet; +import java.util.List; import java.util.Set; -import com.google.gson.annotations.SerializedName; - import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; import org.apache.cloudstack.api.EntityReference; @@ -29,6 +28,7 @@ import com.cloud.serializer.Param; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; +import com.google.gson.annotations.SerializedName; @EntityReference(value = VirtualMachine.class) @SuppressWarnings("unused") @@ -217,6 +217,14 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView @Param(description = "true if the router template requires upgrader") private boolean requiresUpgrade; + @SerializedName("healthchecksfailed") + @Param(description = "true if any health checks had failed") + private boolean healthChecksFailed; + + @SerializedName("healthcheckresults") + @Param(description = "Last executed health check result for the router", responseObject = RouterHealthCheckResultResponse.class, since = "4.14") + List healthCheckResults; + public DomainRouterResponse() { nics = new LinkedHashSet(); } @@ -278,6 +286,14 @@ public String getHypervisor() { return hypervisor; } + public List getHealthCheckResults() { + return healthCheckResults; + } + + public boolean getHealthChecksFailed() { + return healthChecksFailed; + } + public void setHypervisor(String hypervisor) { this.hypervisor = hypervisor; } @@ -446,4 +462,12 @@ public boolean requiresUpgrade() { public void setRequiresUpgrade(boolean requiresUpgrade) { this.requiresUpgrade = requiresUpgrade; } + + public void setHealthChecksFailed(boolean healthChecksFailed) { + this.healthChecksFailed = healthChecksFailed; + } + + public void setHealthCheckResults(List healthCheckResults) { + this.healthCheckResults = healthCheckResults; + } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java new file mode 100644 index 000000000000..f98cf0acd5dd --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import java.util.Date; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; + +public class RouterHealthCheckResultResponse extends BaseResponse { + @SerializedName(ApiConstants.ROUTER_CHECK_NAME) + @Param(description = "the name of the health check on the router") + private String checkName; + + @SerializedName(ApiConstants.ROUTER_CHECK_TYPE) + @Param(description = "the type of the health check - basic or advanced") + private String checkType; + + @SerializedName(ApiConstants.RESULT) + @Param(description = "result of the health check") + private boolean result; + + @SerializedName(ApiConstants.LAST_UPDATED) + @Param(description = "the date this VPC was created") + private Date lastUpdated; + + @SerializedName(ApiConstants.DETAILS) + @Param(description = "detailed response generated on running health check") + private String details; + + public String getCheckName() { + return checkName; + } + + public String getCheckType() { + return checkType; + } + + public boolean getResult() { + return result; + } + + public Date getLastUpdated() { + return lastUpdated; + } + + public String getDetails() { + return details; + } + + public void setCheckName(String checkName) { + this.checkName = checkName; + } + + public void setCheckType(String checkType) { + this.checkType = checkType; + } + + public void setResult(boolean result) { + this.result = result; + } + + public void setLastUpdated(Date lastUpdated) { + this.lastUpdated = lastUpdated; + } + + public void setDetails(String details) { + this.details = details; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java new file mode 100644 index 000000000000..e56f70d2c59c --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import java.util.List; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; + +public class RouterHealthCheckResultsListResponse extends BaseResponse { + @SerializedName(ApiConstants.ROUTER_ID) + @Param(description = "the id of the router") + private String routerId; + + @SerializedName(ApiConstants.ROUTER_HEALTH_CHECKS) + @Param(description = "the id of the router") + private List healthChecks; + + public String getRouterId() { + return routerId; + } + + public List getHealthChecks() { + return healthChecks; + } + + public void setRouterId(String routerId) { + this.routerId = routerId; + } + + public void setHealthChecks(List healthChecks) { + this.healthChecks = healthChecks; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/query/QueryService.java b/api/src/main/java/org/apache/cloudstack/query/QueryService.java index 68dc31f6708d..49e0f5ebdd49 100644 --- a/api/src/main/java/org/apache/cloudstack/query/QueryService.java +++ b/api/src/main/java/org/apache/cloudstack/query/QueryService.java @@ -24,6 +24,7 @@ import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd; import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd; import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd; @@ -68,6 +69,7 @@ import org.apache.cloudstack.api.response.ProjectResponse; import org.apache.cloudstack.api.response.ResourceDetailResponse; import org.apache.cloudstack.api.response.ResourceTagResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.SecurityGroupResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; import org.apache.cloudstack.api.response.StoragePoolResponse; @@ -170,4 +172,6 @@ public interface QueryService { ListResponse listManagementServers(ListMgmtsCmd cmd); ListResponse listTemplateOVFProperties(ListTemplateOVFProperties cmd); + + List listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd); } diff --git a/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java new file mode 100644 index 000000000000..4db59dfac335 --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +import java.util.List; + +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.Command; + +public class GetRouterMonitorResultsAnswer extends Answer { + private List failingChecks; + private String monitoringResults; + + protected GetRouterMonitorResultsAnswer() { + super(); + } + + public GetRouterMonitorResultsAnswer(Command cmd, boolean success, List failingChecks, String monitoringResults) { + super(cmd, success, monitoringResults); + this.failingChecks = failingChecks; + this.monitoringResults = monitoringResults; + } + + public List getFailingChecks() { + return failingChecks; + } + + public String getMonitoringResults() { + return monitoringResults; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java new file mode 100644 index 000000000000..779a0f45a57f --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +public class GetRouterMonitorResultsCommand extends NetworkElementCommand { + private boolean performFreshChecks; + + protected GetRouterMonitorResultsCommand() { + } + + public GetRouterMonitorResultsCommand(boolean performFreshChecks) { + this.performFreshChecks = performFreshChecks; + } + + @Override + public boolean isQuery() { + return true; + } + + public boolean shouldPerformFreshChecks() { + return performFreshChecks; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java b/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java new file mode 100644 index 000000000000..b705a469e81b --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +import java.util.HashMap; +import java.util.Map; + +/** + * Loads new and updates old configuration details on VR for health checks. + */ +public class LoadRouterHealthChecksConfigCommand extends NetworkElementCommand { + + private Map details; + + protected LoadRouterHealthChecksConfigCommand() { + details = new HashMap<>(); + } + + public void addDetail(String key, String value) { + this.details.put(key, value); + } + + public Map getDetails() { + return details; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java b/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java index ae482ac71ec7..de3843e2b837 100644 --- a/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java +++ b/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java @@ -38,7 +38,6 @@ public abstract class NetworkElementCommand extends Command { public static final String GUEST_BRIDGE = "guest.bridge"; public static final String VPC_PRIVATE_GATEWAY = "vpc.gateway.private"; public static final String FIREWALL_EGRESS_DEFAULT = "firewall.egress.default"; - public static final String ROUTER_MONITORING_ENABLE = "router.monitor.enable"; public static final String NETWORK_PUB_LAST_IP = "network.public.last.ip"; private String routerAccessIp; diff --git a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java index a5377039dd63..86fc14c88b79 100644 --- a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java +++ b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java @@ -20,6 +20,9 @@ package com.cloud.agent.api.routing; import java.util.List; +import java.util.Map; + +import org.apache.commons.collections.CollectionUtils; import com.cloud.agent.api.to.MonitorServiceTO; @@ -29,13 +32,24 @@ * how to access the components inside the command. */ public class SetMonitorServiceCommand extends NetworkElementCommand { - MonitorServiceTO[] services; + public static final String ROUTER_MONITORING_ENABLED = "router.monitor.enabled"; + public static final String ROUTER_HEALTH_CHECKS_ENABLED = "router.health.checks.enabled"; + public static final String ROUTER_HEALTH_CHECKS_BASIC_INTERVAL = "router.health.checks.basic.interval"; + public static final String ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL = "router.health.checks.advanced.interval"; + public static final String ROUTER_HEALTH_CHECKS_EXCLUDED = "router.health.checks.excluded"; + + private MonitorServiceTO[] services; + private Map healthChecksConfig; + private boolean reconfigureAfterUpdate; + private boolean deleteFromProcessedCache; protected SetMonitorServiceCommand() { } public SetMonitorServiceCommand(List services) { - this.services = services.toArray(new MonitorServiceTO[services.size()]); + if (CollectionUtils.isNotEmpty(services)) { + this.services = services.toArray(new MonitorServiceTO[services.size()]); + } } public MonitorServiceTO[] getRules() { @@ -43,7 +57,9 @@ public MonitorServiceTO[] getRules() { } public String getConfiguration() { - + if (services == null) { + return null; + } StringBuilder sb = new StringBuilder(); for (MonitorServiceTO service : services) { sb.append("[").append(service.getService()).append("]").append(":"); @@ -55,4 +71,28 @@ public String getConfiguration() { return sb.toString(); } + + public Map getHealthChecksConfig() { + return healthChecksConfig; + } + + public void setHealthChecksConfig(Map healthChecksConfig) { + this.healthChecksConfig = healthChecksConfig; + } + + public boolean shouldReconfigureAfterUpdate() { + return reconfigureAfterUpdate; + } + + public void setReconfigureAfterUpdate(boolean reconfigureAfterUpdate) { + this.reconfigureAfterUpdate = reconfigureAfterUpdate; + } + + public boolean shouldDeleteFromProcessedCache() { + return deleteFromProcessedCache; + } + + public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) { + this.deleteFromProcessedCache = deleteFromProcessedCache; + } } diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java index b9d6487de561..f8cf6d451b84 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java @@ -47,6 +47,8 @@ public class VRScripts { // New scripts for use with chef public static final String UPDATE_CONFIG = "update_config.py"; + public static final String CONFIGURE = "configure.py"; + // Script still in use - mostly by HyperV public static final String S2SVPN_CHECK = "checkbatchs2svpn.sh"; @@ -66,6 +68,7 @@ public class VRScripts { public static final String VPC_STATIC_ROUTE = "vpc_staticroute.sh"; public static final String VPN_L2TP = "vpn_l2tp.sh"; public static final String UPDATE_HOST_PASSWD = "update_host_passwd.sh"; + public static final String ROUTER_MONITOR_RESULTS = "getRouterMonitorResults.sh"; public static final String VR_CFG = "vr_cfg.sh"; diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java index 191a62263f36..f07b1b26bdeb 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java @@ -22,13 +22,6 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.nio.channels.SocketChannel; - -import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand; -import org.apache.cloudstack.diagnostics.DiagnosticsAnswer; -import org.apache.cloudstack.diagnostics.DiagnosticsCommand; -import org.apache.cloudstack.diagnostics.PrepareFilesAnswer; -import org.apache.cloudstack.diagnostics.PrepareFilesCommand; -import org.joda.time.Duration; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -45,8 +38,14 @@ import org.apache.cloudstack.ca.SetupCertificateCommand; import org.apache.cloudstack.ca.SetupKeyStoreCommand; import org.apache.cloudstack.ca.SetupKeystoreAnswer; +import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand; +import org.apache.cloudstack.diagnostics.DiagnosticsAnswer; +import org.apache.cloudstack.diagnostics.DiagnosticsCommand; +import org.apache.cloudstack.diagnostics.PrepareFilesAnswer; +import org.apache.cloudstack.diagnostics.PrepareFilesCommand; import org.apache.cloudstack.utils.security.KeyStoreUtils; import org.apache.log4j.Logger; +import org.joda.time.Duration; import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckRouterAnswer; @@ -59,6 +58,8 @@ import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; import com.cloud.agent.api.routing.GetRouterAlertsCommand; +import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer; +import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand; import com.cloud.agent.api.routing.GroupAnswer; import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.resource.virtualnetwork.facade.AbstractConfigItemFacade; @@ -204,6 +205,8 @@ private Answer executeQueryCommand(NetworkElementCommand cmd) { return execute((PrepareFilesCommand) cmd); } else if (cmd instanceof DeleteFileInVrCommand) { return execute((DeleteFileInVrCommand)cmd); + } else if (cmd instanceof GetRouterMonitorResultsCommand) { + return execute((GetRouterMonitorResultsCommand)cmd); } else { s_logger.error("Unknown query command in VirtualRoutingResource!"); return Answer.createUnsupportedCommandAnswer(cmd); @@ -225,10 +228,7 @@ private ExecutionResult applyConfigToVR(String routerAccessIp, ConfigItem c, Dur throw new CloudRuntimeException("Unable to apply unknown configitem of type " + c.getClass().getSimpleName()); } - private Answer applyConfig(NetworkElementCommand cmd, List cfg) { - - if (cfg.isEmpty()) { return new Answer(cmd, true, "Nothing to do"); } @@ -256,7 +256,6 @@ private Answer applyConfig(NetworkElementCommand cmd, List cfg) { s_logger.warn("Expected " + cmd.getAnswersCount() + " answers while executing " + cmd.getClass().getSimpleName() + " but received " + results.size()); } - if (results.size() == 1) { return new Answer(cmd, finalResult, results.get(0).getDetails()); } else { @@ -275,6 +274,60 @@ private CheckS2SVpnConnectionsAnswer execute(CheckS2SVpnConnectionsCommand cmd) return new CheckS2SVpnConnectionsAnswer(cmd, result.isSuccess(), result.getDetails()); } + private List getFailingChecks(String line) { + List failingChecks = new ArrayList<>(); + for (String w : line.split(",")) { + if (!w.trim().isEmpty()) { + failingChecks.add(w.trim()); + } + } + return failingChecks; + } + + private GetRouterMonitorResultsAnswer parseLinesForHealthChecks(GetRouterMonitorResultsCommand cmd, String executionResult) { + List failingChecks = new ArrayList<>(); + StringBuilder monitorResults = new StringBuilder(); + String[] lines = executionResult.trim().split("\n"); + boolean readingFailedChecks = false, readingMonitorResults = false; + for (String line : lines) { + line = line.trim(); + if (line.contains("FAILING CHECKS")) { // Toggle to reading failing checks from next line + readingFailedChecks = true; + readingMonitorResults = false; + } else if (line.contains("MONITOR RESULTS")) { // Toggle to reading monitor results from next line + readingFailedChecks = false; + readingMonitorResults = true; + } else if (readingFailedChecks && !readingMonitorResults) { // Reading failing checks section + failingChecks.addAll(getFailingChecks(line)); + } else if (!readingFailedChecks && readingMonitorResults) { // Reading monitor checks result + monitorResults.append(line); + } else { + s_logger.error("Unexpected lines reached while parsing health check response. Skipping line:- " + line); + } + } + + return new GetRouterMonitorResultsAnswer(cmd, true, failingChecks, monitorResults.toString()); + } + + private GetRouterMonitorResultsAnswer execute(GetRouterMonitorResultsCommand cmd) { + String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); + String args = cmd.shouldPerformFreshChecks() ? "true" : "false"; + s_logger.info("Fetching health check result for " + routerIp + " and executing fresh checks: " + args); + ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_MONITOR_RESULTS, args); + + if (!result.isSuccess()) { + s_logger.warn("Result of " + cmd + " failed with details: " + result.getDetails()); + return new GetRouterMonitorResultsAnswer(cmd, false, null, result.getDetails()); + } + + if (result.getDetails().isEmpty()) { + s_logger.warn("Result of " + cmd + " received no details."); + return new GetRouterMonitorResultsAnswer(cmd, false, null, "No results available."); + } + + return parseLinesForHealthChecks(cmd, result.getDetails()); + } + private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) { String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java index a083012021ff..1042d23e7b0c 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java @@ -127,7 +127,10 @@ protected List generateConfigItems(final ConfigBase configuration) { final ConfigItem configFile = new FileConfigItem(VRScripts.CONFIG_PERSIST_LOCATION, remoteFilename, gson.toJson(configuration)); cfg.add(configFile); - final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, remoteFilename); + // By default keep files in processed cache on VR + final String args = configuration.shouldDeleteFromProcessedCache() ? remoteFilename + " false" : remoteFilename; + + final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, args); cfg.add(updateCommand); return cfg; diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java index 2cf03e445fc1..8ddf17b15b9c 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java @@ -21,21 +21,56 @@ import java.util.List; +import org.apache.log4j.Logger; + import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.api.routing.SetMonitorServiceCommand; import com.cloud.agent.resource.virtualnetwork.ConfigItem; +import com.cloud.agent.resource.virtualnetwork.ScriptConfigItem; import com.cloud.agent.resource.virtualnetwork.VRScripts; import com.cloud.agent.resource.virtualnetwork.model.ConfigBase; import com.cloud.agent.resource.virtualnetwork.model.MonitorService; public class SetMonitorServiceConfigItem extends AbstractConfigItemFacade { + private static final Logger s_logger = Logger.getLogger(SetMonitorServiceConfigItem.class); @Override public List generateConfig(final NetworkElementCommand cmd) { final SetMonitorServiceCommand command = (SetMonitorServiceCommand) cmd; - final MonitorService monitorService = new MonitorService(command.getConfiguration(), cmd.getAccessDetail(NetworkElementCommand.ROUTER_MONITORING_ENABLE)); - return generateConfigItems(monitorService); + final MonitorService monitorService = new MonitorService( + command.getConfiguration(), + cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_MONITORING_ENABLED), + cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED)); + + setupHealthChecksRelatedInfo(monitorService, command); + + monitorService.setDeleteFromProcessedCache(command.shouldDeleteFromProcessedCache()); + + List configItems = generateConfigItems(monitorService); + if (configItems != null && command.shouldReconfigureAfterUpdate()) { + configItems.add(new ScriptConfigItem(VRScripts.CONFIGURE, "monitor_service.json")); + } + return configItems; + } + + private void setupHealthChecksRelatedInfo(MonitorService monitorService, SetMonitorServiceCommand command) { + try { + monitorService.setHealthChecksBasicRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL))); + } catch (NumberFormatException exception) { + s_logger.error("Unexpected health check basic interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL) + + ". Exception: " + exception + "Will use default value"); + } + + try { + monitorService.setHealthChecksAdvancedRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL))); + } catch (NumberFormatException exception) { + s_logger.error("Unexpected health check advanced interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL) + + ". Exception: " + exception + "Will use default value"); + } + + monitorService.setExcludedHealthChecks(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED)); + monitorService.setHealthChecksConfig(command.getHealthChecksConfig()); } @Override diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java index edc721178cbe..51424ea3115d 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java @@ -41,6 +41,10 @@ public abstract class ConfigBase { private String type = UNKNOWN; + // For use in update_config.py which by default persists files in /var/cache/cloud/processed + // If true we don't keep the file in cache. Useful for monitor service command to avoid space waste + protected boolean deleteFromProcessedCache; + private ConfigBase() { // Empty constructor for (de)serialization } @@ -57,4 +61,7 @@ public void setType(final String type) { this.type = type; } + public boolean shouldDeleteFromProcessedCache() { + return deleteFromProcessedCache; + } } diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java index fdf9e473f35c..fe20476f076d 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java @@ -19,34 +19,84 @@ package com.cloud.agent.resource.virtualnetwork.model; +import java.util.Map; + public class MonitorService extends ConfigBase { public String config, disableMonitoring; + public Boolean healthChecksEnabled; + public Integer healthChecksBasicRunInterval; + public Integer healthChecksAdvancedRunInterval; + public String excludedHealthChecks; + public Map healthChecksConfig; public MonitorService() { super(ConfigBase.MONITORSERVICE); } - public MonitorService(String config, String disableMonitoring) { + public MonitorService(String config, String disableMonitoring, String healthChecksEnabled) { super(ConfigBase.MONITORSERVICE); this.config = config; this.disableMonitoring = disableMonitoring; + this.healthChecksEnabled = Boolean.parseBoolean(healthChecksEnabled); } public String getConfig() { return config; } - public void setConfig(String config) { - this.config = config; - } - public String getDisableMonitoring() { return disableMonitoring; } + public Boolean getHealthChecksEnabled() { + return healthChecksEnabled; + } + + public Integer getHealthChecksBasicRunInterval() { + return healthChecksBasicRunInterval; + } + + public Integer getHealthChecksAdvancedRunInterval() { + return healthChecksAdvancedRunInterval; + } + + public String getExcludedHealthChecks() { + return excludedHealthChecks; + } + + public Map getHealthChecksConfig() { + return healthChecksConfig; + } + + public void setConfig(String config) { + this.config = config; + } + public void setDisableMonitoring(String disableMonitoring) { this.disableMonitoring = disableMonitoring; } + public void setHealthChecksEnabled(Boolean healthChecksEnabled) { + this.healthChecksEnabled = healthChecksEnabled; + } + + public void setHealthChecksBasicRunInterval(Integer healthChecksBasicRunInterval) { + this.healthChecksBasicRunInterval = healthChecksBasicRunInterval; + } + + public void setHealthChecksAdvancedRunInterval(Integer healthChecksAdvancedRunInterval) { + this.healthChecksAdvancedRunInterval = healthChecksAdvancedRunInterval; + } + + public void setExcludedHealthChecks(String excludedHealthChecks) { + this.excludedHealthChecks = excludedHealthChecks; + } + public void setHealthChecksConfig(Map healthChecksConfig) { + this.healthChecksConfig = healthChecksConfig; + } + + public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) { + this.deleteFromProcessedCache = deleteFromProcessedCache; + } } diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java index 6e71864c4473..9c4167ff47e0 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java @@ -30,7 +30,6 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; -import com.cloud.storage.VolumeApiService; import org.apache.cloudstack.api.command.admin.vm.MigrateVMCmd; import org.apache.cloudstack.api.command.admin.volume.MigrateVolumeCmdByAdmin; import org.apache.cloudstack.api.command.user.volume.MigrateVolumeCmd; @@ -106,6 +105,7 @@ import com.cloud.storage.VMTemplateStorageResourceAssoc; import com.cloud.storage.Volume; import com.cloud.storage.Volume.Type; +import com.cloud.storage.VolumeApiService; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.VolumeDao; diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java new file mode 100644 index 000000000000..66dcf68f81fd --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.List; + +import com.cloud.utils.db.GenericDao; + +public interface RouterHealthCheckResultDao extends GenericDao { + /** + * @param routerId + * @return Returns all the health checks in the database for the given router id + */ + List getHealthCheckResults(long routerId); + + boolean expungeHealthChecks(long routerId); + + /** + * @param routerId + * @return true if there are checks that have been marked failed in the database + */ + boolean hasFailingChecks(long routerId); + + /** + * For a router, we have only one (check name, check type) possible as we keep the most + * recent check result. This method finds that last check result. + * + * @param routerId + * @param checkName + * @param checkType + * @return returns the check result for the routerId, check type and the check name. + */ + RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType); +} diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java new file mode 100644 index 000000000000..991365b5f540 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.List; + +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; + +@Component +public class RouterHealthCheckResultDaoImpl extends GenericDaoBase implements RouterHealthCheckResultDao { + private final static Logger s_logger = Logger.getLogger(RouterHealthCheckResultDaoImpl.class); + + private SearchBuilder RouterChecksSearchBuilder; + private SearchBuilder IsRouterFailingSearchBuilder; + + protected RouterHealthCheckResultDaoImpl() { + super(); + RouterChecksSearchBuilder = createSearchBuilder(); + RouterChecksSearchBuilder.and("routerId", RouterChecksSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.and("checkName", RouterChecksSearchBuilder.entity().getCheckName(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.and("checkType", RouterChecksSearchBuilder.entity().getCheckType(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.done(); + + IsRouterFailingSearchBuilder = createSearchBuilder(); + IsRouterFailingSearchBuilder.and("routerId", IsRouterFailingSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ); + IsRouterFailingSearchBuilder.and("checkResult", IsRouterFailingSearchBuilder.entity().getCheckResult(), SearchCriteria.Op.EQ); + IsRouterFailingSearchBuilder.done(); + } + + @Override + public List getHealthCheckResults(long routerId) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + return listBy(sc); + } + + @Override + public boolean expungeHealthChecks(long routerId) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + return expunge(sc) > 0; + } + + @Override + public RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + sc.setParameters("checkName", checkName); + sc.setParameters("checkType", checkType); + List checks = listBy(sc); + if (checks.size() > 1) { + s_logger.error("Found multiple entries for router Id: " + routerId + ", check name: " + checkName); + } + return checks.isEmpty() ? null : checks.get(0); + } + + @Override + public boolean hasFailingChecks(long routerId) { + SearchCriteria sc = IsRouterFailingSearchBuilder.create(); + sc.setParameters("routerId", routerId); + sc.setParameters("checkResult", false); + return !listBy(sc).isEmpty(); + } +} diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java new file mode 100644 index 000000000000..9803ccb6a4bd --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.Date; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; +import javax.persistence.Temporal; +import javax.persistence.TemporalType; + +import com.cloud.network.RouterHealthCheckResult; +import com.cloud.utils.StringUtils; + +@Entity +@Table(name = "router_health_check") +public class RouterHealthCheckResultVO implements RouterHealthCheckResult { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", updatable = false, nullable = false) + private long id; + + @Column(name = "router_id", updatable = false, nullable = false) + private long routerId; + + @Column(name = "check_name", updatable = false, nullable = false) + private String checkName; + + @Column(name = "check_type", updatable = false, nullable = false) + private String checkType; + + @Column(name = "check_result") + private boolean checkResult; + + @Temporal(TemporalType.TIMESTAMP) + @Column(name = "last_update", updatable = true, nullable = true) + private Date lastUpdateTime; + + @Column(name = "check_details", updatable = true, nullable = true) + private byte[] checkDetails; + + protected RouterHealthCheckResultVO() { + } + + public RouterHealthCheckResultVO(long routerId, String checkName, String checkType) { + this.routerId = routerId; + this.checkName = checkName; + this.checkType = checkType; + } + + public long getId() { + return id; + } + + @Override + public long getRouterId() { + return routerId; + } + + @Override + public String getCheckName() { + return checkName; + } + + @Override + public String getCheckType() { + return checkType; + } + + @Override + public boolean getCheckResult() { + return checkResult; + } + + @Override + public Date getLastUpdateTime() { + return lastUpdateTime; + } + + @Override + public String getParsedCheckDetails() { + return checkDetails != null ? new String(checkDetails, StringUtils.getPreferredCharset()) : ""; + } + + public byte[] getCheckDetails() { + return checkDetails; + } + + public void setCheckResult(boolean checkResult) { + this.checkResult = checkResult; + } + + public void setLastUpdateTime(Date lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + } + + public void setCheckDetails(byte[] checkDetails) { + this.checkDetails = checkDetails; + } + + @Override + public String toString() { + return super.toString() + + "- check type: " + checkType + + ",check name: " + checkName + + ", check result: " + checkResult + + ", check last update: " + lastUpdateTime + + ", details: " + getParsedCheckDetails(); + } +} diff --git a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml index 3e0d67b61a49..34c356dab342 100644 --- a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml +++ b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml @@ -288,4 +288,5 @@ + diff --git a/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql b/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql index 63fdbab3799f..2dceb8a02138 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql @@ -36,3 +36,18 @@ UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=283 AND display_name= UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=284 AND display_name="Red Hat Enterprise Linux 7.5"; UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=285 AND display_name="Red Hat Enterprise Linux 7.6"; UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=286 AND display_name="Red Hat Enterprise Linux 8.0"; + +-- Create table for router health checks. We only save last check result for each. +CREATE TABLE `cloud`.`router_health_check` ( + `id` bigint unsigned NOT NULL auto_increment, + `router_id` bigint unsigned NOT NULL COMMENT 'router id', + `check_name` varchar(255) NOT NULL COMMENT 'name of the health check', + `check_type` varchar(255) NOT NULL COMMENT 'type of the health check', + `last_update` DATETIME NULL COMMENT 'last check update time', + `check_result` boolean NOT NULL COMMENT 'check executions success or failure', + `check_details` BLOB NULL COMMENT 'check result detailed message', + PRIMARY KEY (`id`), + CONSTRAINT `fk_router_health_checks__router_id` FOREIGN KEY (`router_id`) REFERENCES `domain_router`(`id`) ON DELETE CASCADE, + UNIQUE `i_router_health_checks__router_id__check_name__check_type`(`router_id`, `check_name`, `check_type`), + INDEX `i_router_health_checks__router_id`(`router_id`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; diff --git a/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java b/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java index 65fea0928935..74360fe9cb58 100644 --- a/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java +++ b/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java @@ -22,6 +22,17 @@ // Automatically generated by addcopyright.py at 04/03/2012 package com.cloud.baremetal.networkservice; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import javax.naming.ConfigurationException; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.log4j.Logger; + import com.cloud.agent.IAgentControl; import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckNetworkAnswer; @@ -70,15 +81,6 @@ import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.PowerState; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.api.ApiConstants; -import org.apache.cloudstack.framework.config.dao.ConfigurationDao; -import org.apache.log4j.Logger; - -import javax.naming.ConfigurationException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; public class BareMetalResourceBase extends ManagerBase implements ServerResource { private static final Logger s_logger = Logger.getLogger(BareMetalResourceBase.class); diff --git a/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java b/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java index 979be732f2bb..038661b58adf 100644 --- a/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java +++ b/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java @@ -2085,6 +2085,11 @@ protected Answer execute(final SetMonitorServiceCommand cmd) { final String controlIp = getRouterSshControlIp(cmd); final String config = cmd.getConfiguration(); + if (org.apache.commons.lang.StringUtils.isBlank(config)) { + s_logger.error("SetMonitorServiceCommand should have config for this case"); + return new Answer(cmd, false, "SetMonitorServiceCommand failed due to missing config"); + } + final String args = String.format(" %s %s", "-c", config); final String command = String.format("%s%s %s", "/opt/cloud/bin/", VRScripts.MONITOR_SERVICE, args); diff --git a/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java b/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java index 95204900f72f..f9a478713c74 100644 --- a/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java +++ b/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java @@ -24,30 +24,6 @@ import javax.inject.Inject; -import junit.framework.TestCase; -import net.juniper.contrail.api.ApiConnector; -import net.juniper.contrail.api.ApiConnectorFactory; -import net.juniper.contrail.api.ApiConnectorMock; -import net.juniper.contrail.api.types.InstanceIp; -import net.juniper.contrail.api.types.NetworkIpam; -import net.juniper.contrail.api.types.Project; -import net.juniper.contrail.api.types.SubnetType; -import net.juniper.contrail.api.types.VirtualMachine; -import net.juniper.contrail.api.types.VirtualMachineInterface; -import net.juniper.contrail.api.types.VirtualNetwork; -import net.juniper.contrail.api.types.VnSubnetsType; - -import org.apache.log4j.Logger; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.context.support.AbstractApplicationContext; -import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; - import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; import org.apache.cloudstack.api.command.user.address.AssociateIPAddrCmd; @@ -58,6 +34,16 @@ import org.apache.cloudstack.api.command.user.project.DeleteProjectCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.context.support.AbstractApplicationContext; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import com.cloud.agent.AgentManager; import com.cloud.dc.DataCenter; @@ -84,6 +70,19 @@ import com.cloud.utils.mgmt.JmxUtil; import com.cloud.vm.VirtualMachineManager; +import junit.framework.TestCase; +import net.juniper.contrail.api.ApiConnector; +import net.juniper.contrail.api.ApiConnectorFactory; +import net.juniper.contrail.api.ApiConnectorMock; +import net.juniper.contrail.api.types.InstanceIp; +import net.juniper.contrail.api.types.NetworkIpam; +import net.juniper.contrail.api.types.Project; +import net.juniper.contrail.api.types.SubnetType; +import net.juniper.contrail.api.types.VirtualMachine; +import net.juniper.contrail.api.types.VirtualMachineInterface; +import net.juniper.contrail.api.types.VirtualNetwork; +import net.juniper.contrail.api.types.VnSubnetsType; + @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration(locations = "classpath:/providerContext.xml") /** diff --git a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java index 2cb4e8cc6721..8639db6cfca5 100644 --- a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java +++ b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java @@ -18,6 +18,23 @@ */ package org.apache.cloudstack.storage.datastore.provider; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.inject.Inject; + +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; +import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; +import org.apache.cloudstack.storage.datastore.util.DateraObject; +import org.apache.cloudstack.storage.datastore.util.DateraUtil; +import org.apache.log4j.Logger; + import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; import com.cloud.agent.api.ModifyStoragePoolAnswer; @@ -41,21 +58,6 @@ import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; -import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener; -import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; -import org.apache.cloudstack.storage.datastore.util.DateraObject; -import org.apache.cloudstack.storage.datastore.util.DateraUtil; -import org.apache.log4j.Logger; - -import javax.inject.Inject; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; public class DateraHostListener implements HypervisorHostListener { private static final Logger s_logger = Logger.getLogger(DateraHostListener.class); diff --git a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java index 19b678e4aeb4..aa277cd4dab5 100644 --- a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java +++ b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java @@ -24,6 +24,33 @@ import javax.inject.Inject; +import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; +import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; +import org.apache.cloudstack.engine.subsystem.api.storage.DataObject; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; +import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine; +import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; +import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.apache.cloudstack.framework.async.AsyncCompletionCallback; +import org.apache.cloudstack.storage.command.CommandResult; +import org.apache.cloudstack.storage.command.CreateObjectAnswer; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; +import org.apache.cloudstack.storage.datastore.util.SolidFireUtil; +import org.apache.cloudstack.storage.to.SnapshotObjectTO; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; + import com.cloud.agent.api.Answer; import com.cloud.agent.api.to.DataObjectType; import com.cloud.agent.api.to.DataStoreTO; @@ -39,12 +66,12 @@ import com.cloud.storage.ResizeVolumePayload; import com.cloud.storage.Snapshot.State; import com.cloud.storage.SnapshotVO; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; import com.cloud.storage.Volume; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; -import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.SnapshotDetailsDao; import com.cloud.storage.dao.SnapshotDetailsVO; @@ -57,36 +84,8 @@ import com.cloud.user.dao.AccountDao; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; - import com.google.common.base.Preconditions; -import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; -import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; -import org.apache.cloudstack.engine.subsystem.api.storage.DataObject; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; -import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; -import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; -import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine; -import org.apache.cloudstack.framework.async.AsyncCompletionCallback; -import org.apache.cloudstack.storage.command.CommandResult; -import org.apache.cloudstack.storage.command.CreateObjectAnswer; -import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; -import org.apache.cloudstack.storage.datastore.util.SolidFireUtil; -import org.apache.cloudstack.storage.to.SnapshotObjectTO; -import org.apache.commons.lang.StringUtils; -import org.apache.log4j.Logger; - public class SolidFirePrimaryDataStoreDriver implements PrimaryDataStoreDriver { private static final Logger LOGGER = Logger.getLogger(SolidFirePrimaryDataStoreDriver.class); private static final int LOWEST_HYPERVISOR_SNAPSHOT_RESERVE = 10; diff --git a/server/src/main/java/com/cloud/api/ApiResponseHelper.java b/server/src/main/java/com/cloud/api/ApiResponseHelper.java index b8e60325ea22..c121bcb034f2 100644 --- a/server/src/main/java/com/cloud/api/ApiResponseHelper.java +++ b/server/src/main/java/com/cloud/api/ApiResponseHelper.java @@ -31,8 +31,6 @@ import javax.inject.Inject; -import com.cloud.vm.snapshot.VMSnapshotVO; -import com.cloud.vm.snapshot.dao.VMSnapshotDao; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.affinity.AffinityGroup; @@ -63,6 +61,7 @@ import org.apache.cloudstack.api.response.CreateSSHKeyPairResponse; import org.apache.cloudstack.api.response.DiskOfferingResponse; import org.apache.cloudstack.api.response.DomainResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.DomainRouterResponse; import org.apache.cloudstack.api.response.EventResponse; import org.apache.cloudstack.api.response.ExtractResponse; @@ -235,6 +234,7 @@ import com.cloud.network.PhysicalNetworkServiceProvider; import com.cloud.network.PhysicalNetworkTrafficType; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.Site2SiteVpnGateway; @@ -336,6 +336,8 @@ import com.cloud.vm.dao.NicExtraDhcpOptionDao; import com.cloud.vm.dao.NicSecondaryIpVO; import com.cloud.vm.snapshot.VMSnapshot; +import com.cloud.vm.snapshot.VMSnapshotVO; +import com.cloud.vm.snapshot.dao.VMSnapshotDao; public class ApiResponseHelper implements ResponseGenerator { @@ -1349,6 +1351,7 @@ public DomainRouterResponse createDomainRouterResponse(VirtualRouter router) { return listVrs.get(0); } + @Override public SystemVmResponse createSystemVmResponse(VirtualMachine vm) { SystemVmResponse vmResponse = new SystemVmResponse(); @@ -4205,4 +4208,20 @@ public ManagementServerResponse createManagementResponse(ManagementServerHost mg response.setState(mgmt.getState()); return response; } + + @Override + public List createHealthCheckResponse(VirtualMachine router, List healthCheckResults) { + List responses = new ArrayList<>(healthCheckResults.size()); + for (RouterHealthCheckResult hcResult : healthCheckResults) { + RouterHealthCheckResultResponse healthCheckResponse = new RouterHealthCheckResultResponse(); + healthCheckResponse.setObjectName("routerhealthchecks"); + healthCheckResponse.setCheckName(hcResult.getCheckName()); + healthCheckResponse.setCheckType(hcResult.getCheckType()); + healthCheckResponse.setResult(hcResult.getCheckResult()); + healthCheckResponse.setLastUpdated(hcResult.getLastUpdateTime()); + healthCheckResponse.setDetails(hcResult.getParsedCheckDetails()); + responses.add(healthCheckResponse); + } + return responses; + } } diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index 1ba083f88aca..f33f50eb3735 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -31,9 +31,6 @@ import javax.inject.Inject; -import com.cloud.agent.api.storage.OVFProperty; -import com.cloud.storage.TemplateOVFPropertyVO; -import com.cloud.storage.dao.TemplateOVFPropertiesDao; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO; import org.apache.cloudstack.affinity.AffinityGroupResponse; @@ -42,6 +39,7 @@ import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao; import org.apache.cloudstack.api.BaseListProjectAndAccountResourcesCmd; import org.apache.cloudstack.api.ResourceDetail; +import org.apache.cloudstack.api.ResponseGenerator; import org.apache.cloudstack.api.ResponseObject.ResponseView; import org.apache.cloudstack.api.command.admin.account.ListAccountsCmdByAdmin; import org.apache.cloudstack.api.command.admin.domain.ListDomainsCmd; @@ -51,6 +49,7 @@ import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd; import org.apache.cloudstack.api.command.admin.iso.ListIsosCmdByAdmin; import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd; import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd; @@ -98,6 +97,7 @@ import org.apache.cloudstack.api.response.ProjectResponse; import org.apache.cloudstack.api.response.ResourceDetailResponse; import org.apache.cloudstack.api.response.ResourceTagResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.SecurityGroupResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; import org.apache.cloudstack.api.response.StoragePoolResponse; @@ -123,6 +123,7 @@ import org.apache.log4j.Logger; import org.springframework.stereotype.Component; +import com.cloud.agent.api.storage.OVFProperty; import com.cloud.api.query.dao.AccountJoinDao; import com.cloud.api.query.dao.AffinityGroupJoinDao; import com.cloud.api.query.dao.AsyncJobJoinDao; @@ -182,6 +183,10 @@ import com.cloud.ha.HighAvailabilityManager; import com.cloud.hypervisor.Hypervisor; import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.network.RouterHealthCheckResult; +import com.cloud.network.VpcVirtualNetworkApplianceService; +import com.cloud.network.dao.RouterHealthCheckResultDao; +import com.cloud.network.router.VirtualNetworkApplianceManager; import com.cloud.network.security.SecurityGroupVMMapVO; import com.cloud.network.security.dao.SecurityGroupVMMapDao; import com.cloud.org.Grouping; @@ -206,9 +211,11 @@ import com.cloud.storage.Storage.ImageFormat; import com.cloud.storage.Storage.TemplateType; import com.cloud.storage.StoragePoolTagVO; +import com.cloud.storage.TemplateOVFPropertyVO; import com.cloud.storage.VMTemplateVO; import com.cloud.storage.Volume; import com.cloud.storage.dao.StoragePoolTagsDao; +import com.cloud.storage.dao.TemplateOVFPropertiesDao; import com.cloud.storage.dao.VMTemplateDao; import com.cloud.tags.ResourceTagVO; import com.cloud.tags.dao.ResourceTagDao; @@ -395,6 +402,15 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q @Inject TemplateOVFPropertiesDao templateOVFPropertiesDao; + @Inject + public VpcVirtualNetworkApplianceService routerService; + + @Inject + private ResponseGenerator responseGenerator; + + @Inject + private RouterHealthCheckResultDao routerHealthCheckResultDao; + /* * (non-Javadoc) * @@ -1158,8 +1174,17 @@ public ListResponse searchForRouters(ListRoutersCmd cmd) { Pair, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), cmd.getClusterId(), cmd.getHostId(), cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), cmd.getVersion()); ListResponse response = new ListResponse(); - List routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()])); + if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + for (DomainRouterResponse res : routerResponses) { + DomainRouterVO resRouter = _routerDao.findByUuid(res.getId()); + res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId())); + if (cmd.shouldFetchHealthCheckResults()) { + res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter, + new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId())))); + } + } + } response.setResponses(routerResponses, result.second()); return response; } @@ -1169,8 +1194,18 @@ public ListResponse searchForInternalLbVms(ListInternalLBV Pair, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), null, cmd.getHostId(), cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), null); ListResponse response = new ListResponse(); - List routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()])); + if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + for (DomainRouterResponse res : routerResponses) { + DomainRouterVO resRouter = _routerDao.findByUuid(res.getId()); + res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId())); + if (cmd.shouldFetchHealthCheckResults()) { + res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter, + new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId())))); + } + } + } + response.setResponses(routerResponses, result.second()); return response; } @@ -3865,6 +3900,27 @@ public ListResponse listTemplateOVFProperties(ListT return response; } + @Override + public List listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd) { + s_logger.info("Executing health check command " + cmd); + long routerId = cmd.getRouterId(); + if (!VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + throw new CloudRuntimeException("Router health checks are not enabled for router " + routerId); + } + + if (cmd.shouldPerformFreshChecks() && !routerService.performRouterHealthChecks(routerId)) { + throw new CloudRuntimeException("Unable to perform fresh checks on router."); + } + + List result = new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(routerId)); + if (result == null || result.size() == 0) { + throw new CloudRuntimeException("Database had no entries for health checks for router. This could happen for " + + "a newly created router. Please wait for periodic results to populate or manually call for checks to execute."); + } + + return responseGenerator.createHealthCheckResponse(_routerDao.findById(routerId), result); + } + @Override public String getConfigComponentName() { return QueryService.class.getSimpleName(); diff --git a/server/src/main/java/com/cloud/network/NetworkServiceImpl.java b/server/src/main/java/com/cloud/network/NetworkServiceImpl.java index 1e9eb2d325f5..7761ce69ef39 100644 --- a/server/src/main/java/com/cloud/network/NetworkServiceImpl.java +++ b/server/src/main/java/com/cloud/network/NetworkServiceImpl.java @@ -1861,14 +1861,7 @@ public boolean deleteNetwork(long networkId, boolean forced) { @Override @ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true) - public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { - // This method restarts all network elements belonging to the network and re-applies all the rules - Long networkId = cmd.getNetworkId(); - - User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); - Account callerAccount = _accountMgr.getActiveAccountById(callerUser.getAccountId()); - - // Check if network exists + public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { NetworkVO network = _networksDao.findById(networkId); if (network == null) { throwInvalidIdException("Network with specified id doesn't exist", networkId.toString(), "networkId"); @@ -1888,8 +1881,8 @@ public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean ma throw new InvalidParameterException("Unable to restart a running SDN network."); } + Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId()); _accountMgr.checkAccess(callerAccount, null, true, network); - if (!network.isRedundant() && makeRedundant) { network.setRedundant(true); if (!_networksDao.update(network.getId(), network)) { @@ -1898,8 +1891,7 @@ public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean ma cleanup = true; } - boolean success = _networkMgr.restartNetwork(networkId, callerAccount, callerUser, cleanup); - + boolean success = _networkMgr.restartNetwork(networkId, callerAccount, user, cleanup); if (success) { s_logger.debug("Network id=" + networkId + " is restarted successfully."); } else { @@ -1909,6 +1901,17 @@ public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean ma return success; } + @Override + @ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true) + public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { + // This method restarts all network elements belonging to the network and re-applies all the rules + Long networkId = cmd.getNetworkId(); + boolean cleanup = cmd.getCleanup(); + boolean makeRedundant = cmd.getMakeRedundant(); + User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); + return restartNetwork(networkId, cleanup, makeRedundant, callerUser); + } + @Override public int getActiveNicsInNetwork(long networkId) { return _networksDao.getActiveNicsIn(networkId); diff --git a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java index efab0e255689..56814daeef25 100644 --- a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java +++ b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java @@ -18,23 +18,21 @@ import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Collections; import javax.inject.Inject; import javax.naming.ConfigurationException; -import com.cloud.network.dao.FirewallRulesDcidrsDao; -import org.apache.log4j.Logger; -import org.springframework.stereotype.Component; - import org.apache.cloudstack.api.command.user.firewall.IListFirewallRulesCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; import com.cloud.configuration.Config; import com.cloud.domain.dao.DomainDao; @@ -55,6 +53,7 @@ import com.cloud.network.NetworkRuleApplier; import com.cloud.network.dao.FirewallRulesCidrsDao; import com.cloud.network.dao.FirewallRulesDao; +import com.cloud.network.dao.FirewallRulesDcidrsDao; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; import com.cloud.network.dao.NetworkDao; diff --git a/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java b/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java index da07bb5dff5d..18f4a45994e5 100644 --- a/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java +++ b/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java @@ -258,7 +258,7 @@ public VirtualRouter destroyRouter(final long routerId, final Account caller, fi @Override public boolean checkRouterVersion(final VirtualRouter router) { - if (!VirtualNetworkApplianceManagerImpl.routerVersionCheckEnabled.value()) { + if (!VirtualNetworkApplianceManager.RouterVersionCheckEnabled.value()) { // Router version check is disabled. return true; } diff --git a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java index a291b3590b25..eba23f3fe3df 100644 --- a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java +++ b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java @@ -45,6 +45,10 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring"; static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval"; + static final String RouterHealthChecksConfigRefreshIntervalCK = "router.health.checks.config.refresh.interval"; + static final String RouterHealthChecksResultFetchIntervalCK = "router.health.checks.results.fetch.interval"; + static final String RouterHealthChecksFailuresToRecreateVrCK = "router.health.checks.failures.to.recreate.vr"; + static final ConfigKey RouterTemplateXen = new ConfigKey(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)", "Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null); static final ConfigKey RouterTemplateKvm = new ConfigKey(String.class, RouterTemplateKvmCK, "Advanced", "SystemVM Template (KVM)", @@ -63,11 +67,48 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final ConfigKey RouterAlertsCheckInterval = new ConfigKey(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800", "Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null); - static final ConfigKey routerVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", + static final ConfigKey RouterVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", "If true, router minimum required version is checked before sending command", false); static final ConfigKey UseExternalDnsServers = new ConfigKey(Boolean.class, "use.external.dns", "Advanced", "false", "Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null); + // Health checks + static final ConfigKey RouterHealthChecksEnabled = new ConfigKey(Boolean.class, "router.health.checks.enabled", "Advanced", "true", + "If true, router health checks are allowed to be executed and read. If false, all scheduled checks and API calls for on demand checks are disabled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksBasicInterval = new ConfigKey(Integer.class, "router.health.checks.basic.interval", "Advanced", "3", + "Interval in minutes at which basic router health checks are performed. If set to 0, no tests are scheduled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksAdvancedInterval = new ConfigKey(Integer.class, "router.health.checks.advanced.interval", "Advanced", "10", + "Interval in minutes at which advanced router health checks are performed. If set to 0, no tests are scheduled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksConfigRefreshInterval = new ConfigKey(Integer.class, RouterHealthChecksConfigRefreshIntervalCK, "Advanced", "10", + "Interval in minutes at which router health checks config - such as scheduling intervals, excluded checks, etc is updated on virtual routers by the management server. This value should" + + " be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and results generation for passed data.", + false, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksResultFetchInterval = new ConfigKey(Integer.class, RouterHealthChecksResultFetchIntervalCK, "Advanced", "10", + "Interval in minutes at which router health checks results are fetched by management server. On each result fetch, management server evaluates need to recreate VR as per configuration of " + RouterHealthChecksFailuresToRecreateVrCK + + "This value should be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and fetch.", + false, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksFailuresToRecreateVr = new ConfigKey(String.class, RouterHealthChecksFailuresToRecreateVrCK, "Advanced", "", + "Health checks failures defined by this config are the checks that should cause router recreation. If empty the recreate is not attempted for any health check failure. Possible values are comma separated script names " + + "from systemvm’s /root/health_scripts/ (namely - cpu_usage_check.py, dhcp_check.py, disk_space_check.py, dns_check.py, gateways_check.py, haproxy_check.py, iptables_check.py, memory_usage_check.py, router_version_check.py), connectivity.test " + + " or services (namely - loadbalancing.service, webserver.service, dhcp.service) ", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksToExclude = new ConfigKey(String.class, "router.health.checks.to.exclude", "Advanced", "", + "Health checks that should be excluded when executing scheduled checks on the router. This can be a comma separated list of script names placed in the '/root/health_checks/' folder. Currently the following scripts are " + + "placed in default systemvm template - cpu_usage_check.py, disk_space_check.py, gateways_check.py, iptables_check.py, router_version_check.py, dhcp_check.py, dns_check.py, haproxy_check.py, memory_usage_check.py.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksFreeDiskSpaceThreshold = new ConfigKey(Double.class, "router.health.checks.free.disk.space.threshold", + "Advanced", "100", "Free disk space threshold (in MB) on VR below which the check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksMaxCpuUsageThreshold = new ConfigKey(Double.class, "router.health.checks.max.cpu.usage.threshold", + "Advanced", "100", " Max CPU Usage threshold as % above which check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksMaxMemoryUsageThreshold = new ConfigKey(Double.class, "router.health.checks.max.memory.usage.threshold", + "Advanced", "100", "Max Memory Usage threshold as % above which check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + public static final int DEFAULT_ROUTER_VM_RAMSIZE = 256; // 256M public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz public static final boolean USE_POD_VLAN = false; diff --git a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index c8ea47b31db1..87933456de92 100644 --- a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -17,6 +17,7 @@ package com.cloud.network.router; +import java.lang.reflect.Type; import java.math.BigInteger; import java.nio.charset.Charset; import java.security.MessageDigest; @@ -24,7 +25,9 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -42,11 +45,6 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; -import org.apache.log4j.Logger; -import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; - import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.alert.AlertService.AlertType; import org.apache.cloudstack.api.command.admin.router.RebootRouterCmd; @@ -61,11 +59,18 @@ import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; +import org.apache.cloudstack.lb.ApplicationLoadBalancerRuleVO; +import org.apache.cloudstack.lb.dao.ApplicationLoadBalancerRuleDao; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.network.topology.NetworkTopology; import org.apache.cloudstack.network.topology.NetworkTopologyContext; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.usage.UsageUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import com.cloud.agent.AgentManager; import com.cloud.agent.Listener; @@ -87,6 +92,9 @@ import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; import com.cloud.agent.api.routing.GetRouterAlertsCommand; +import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer; +import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand; +import com.cloud.agent.api.routing.GroupAnswer; import com.cloud.agent.api.routing.IpAliasTO; import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.api.routing.SetMonitorServiceCommand; @@ -95,6 +103,10 @@ import com.cloud.alert.AlertManager; import com.cloud.api.ApiAsyncJobDispatcher; import com.cloud.api.ApiGsonHelper; +import com.cloud.api.query.dao.DomainRouterJoinDao; +import com.cloud.api.query.dao.UserVmJoinDao; +import com.cloud.api.query.vo.DomainRouterJoinVO; +import com.cloud.api.query.vo.UserVmJoinVO; import com.cloud.cluster.ManagementServerHostVO; import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.configuration.Config; @@ -109,7 +121,9 @@ import com.cloud.dc.dao.HostPodDao; import com.cloud.dc.dao.VlanDao; import com.cloud.deploy.DeployDestination; +import com.cloud.domain.Domain; import com.cloud.event.ActionEvent; +import com.cloud.event.ActionEventUtils; import com.cloud.event.EventTypes; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.ConcurrentOperationException; @@ -135,6 +149,7 @@ import com.cloud.network.Networks.TrafficType; import com.cloud.network.PublicIpAddress; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.SshKeysDistriMonitor; @@ -144,8 +159,11 @@ import com.cloud.network.dao.FirewallRulesDao; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; +import com.cloud.network.dao.LBStickinessPolicyDao; +import com.cloud.network.dao.LBStickinessPolicyVO; import com.cloud.network.dao.LoadBalancerDao; import com.cloud.network.dao.LoadBalancerVMMapDao; +import com.cloud.network.dao.LoadBalancerVMMapVO; import com.cloud.network.dao.LoadBalancerVO; import com.cloud.network.dao.MonitoringServiceDao; import com.cloud.network.dao.MonitoringServiceVO; @@ -155,6 +173,8 @@ import com.cloud.network.dao.OpRouterMonitorServiceVO; import com.cloud.network.dao.PhysicalNetworkServiceProviderDao; import com.cloud.network.dao.RemoteAccessVpnDao; +import com.cloud.network.dao.RouterHealthCheckResultDao; +import com.cloud.network.dao.RouterHealthCheckResultVO; import com.cloud.network.dao.Site2SiteCustomerGatewayDao; import com.cloud.network.dao.Site2SiteVpnConnectionDao; import com.cloud.network.dao.Site2SiteVpnConnectionVO; @@ -175,12 +195,14 @@ import com.cloud.network.rules.FirewallRuleVO; import com.cloud.network.rules.LoadBalancerContainer.Scheme; import com.cloud.network.rules.PortForwardingRule; +import com.cloud.network.rules.PortForwardingRuleVO; import com.cloud.network.rules.RulesManager; import com.cloud.network.rules.StaticNat; import com.cloud.network.rules.StaticNatImpl; import com.cloud.network.rules.StaticNatRule; import com.cloud.network.rules.dao.PortForwardingRulesDao; import com.cloud.network.vpc.Vpc; +import com.cloud.network.vpc.VpcService; import com.cloud.network.vpc.dao.VpcDao; import com.cloud.network.vpn.Site2SiteVpnManager; import com.cloud.offering.NetworkOffering; @@ -188,6 +210,7 @@ import com.cloud.offerings.NetworkOfferingVO; import com.cloud.offerings.dao.NetworkOfferingDao; import com.cloud.resource.ResourceManager; +import com.cloud.serializer.GsonHelper; import com.cloud.server.ConfigurationServer; import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; @@ -214,6 +237,7 @@ import com.cloud.utils.db.Filter; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.QueryBuilder; +import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.Transaction; import com.cloud.utils.db.TransactionCallbackNoReturn; @@ -243,6 +267,8 @@ import com.cloud.vm.dao.UserVmDao; import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.VMInstanceDao; +import com.google.gson.JsonSyntaxException; +import com.google.gson.reflect.TypeToken; /** * VirtualNetworkApplianceManagerImpl manages the different types of virtual @@ -251,6 +277,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements VirtualNetworkApplianceManager, VirtualNetworkApplianceService, VirtualMachineGuru, Listener, Configurable, StateListener { private static final Logger s_logger = Logger.getLogger(VirtualNetworkApplianceManagerImpl.class); + private static final String CONNECTIVITY_TEST = "connectivity.test"; @Inject private EntityManager _entityMgr; @Inject private DataCenterDao _dcDao; @@ -272,12 +299,12 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V @Inject private AccountManager _accountMgr; @Inject private ConfigurationManager _configMgr; @Inject private ConfigurationServer _configServer; - @Inject private ServiceOfferingDao _serviceOfferingDao; + @Inject protected ServiceOfferingDao _serviceOfferingDao; @Inject private UserVmDao _userVmDao; @Inject private VMInstanceDao _vmDao; @Inject private NetworkOfferingDao _networkOfferingDao; @Inject private GuestOSDao _guestOSDao; - @Inject private NetworkOrchestrationService _networkMgr; + @Inject protected NetworkOrchestrationService _networkMgr; @Inject protected NetworkModel _networkModel; @Inject protected VirtualMachineManager _itMgr; @Inject private VpnUserDao _vpnUsersDao; @@ -303,7 +330,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V @Inject private NetworkService _networkSvc; @Inject private IpAddressManager _ipAddrMgr; @Inject private ConfigDepot _configDepot; - @Inject private MonitoringServiceDao _monitorServiceDao; + @Inject protected MonitoringServiceDao _monitorServiceDao; @Inject private AsyncJobManager _asyncMgr; @Inject protected VpcDao _vpcDao; @Inject protected ApiAsyncJobDispatcher _asyncDispatcher; @@ -311,6 +338,16 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V @Inject protected NetworkTopologyContext _networkTopologyContext; + @Inject private UserVmJoinDao userVmJoinDao; + @Inject private DomainRouterJoinDao domainRouterJoinDao; + @Inject private PortForwardingRulesDao portForwardingDao; + @Inject private ApplicationLoadBalancerRuleDao applicationLoadBalancerRuleDao; + @Inject private RouterHealthCheckResultDao routerHealthCheckResultDao; + @Inject private LBStickinessPolicyDao lbStickinessPolicyDao; + + @Inject private NetworkService networkService; + @Inject private VpcService vpcService; + @Autowired @Qualifier("networkHelper") protected NetworkHelper _nwHelper; @@ -496,12 +533,6 @@ public VirtualRouter rebootRouter(final long routerId, final boolean reprogramNe } } - static final ConfigKey UseExternalDnsServers = new ConfigKey(Boolean.class, "use.external.dns", "Advanced", "false", - "Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null); - - static final ConfigKey routerVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", - "If true, router minimum required version is checked before sending command", false); - @Override public boolean configure(final String name, final Map params) throws ConfigurationException { @@ -658,7 +689,21 @@ public boolean start() { if (routerAlertsCheckInterval > 0) { _checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), routerAlertsCheckInterval, routerAlertsCheckInterval, TimeUnit.SECONDS); } else { - s_logger.debug("router.alerts.check.interval - " + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread"); + s_logger.debug(RouterAlertsCheckIntervalCK + "=" + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread"); + } + + final int routerHealthCheckConfigRefreshInterval = RouterHealthChecksConfigRefreshInterval.value(); + if (routerHealthCheckConfigRefreshInterval > 0) { + _checkExecutor.scheduleAtFixedRate(new UpdateRouterHealthChecksConfigTask(), routerHealthCheckConfigRefreshInterval, routerHealthCheckConfigRefreshInterval, TimeUnit.MINUTES); + } else { + s_logger.debug(RouterHealthChecksConfigRefreshIntervalCK + "=" + routerHealthCheckConfigRefreshInterval + " so not scheduling the router health check data thread"); + } + + final int routerHealthChecksFetchInterval = RouterHealthChecksResultFetchInterval.value(); + if (routerHealthChecksFetchInterval > 0) { + _checkExecutor.scheduleAtFixedRate(new FetchRouterHealthChecksResultTask(), routerHealthChecksFetchInterval, routerHealthChecksFetchInterval, TimeUnit.MINUTES); + } else { + s_logger.debug(RouterHealthChecksResultFetchIntervalCK + "=" + routerHealthChecksFetchInterval + " so not scheduling the router checks fetching thread"); } return true; @@ -1186,6 +1231,599 @@ protected void pushToUpdateQueue(final List networks) throws Interrup } } + protected class FetchRouterHealthChecksResultTask extends ManagedContextRunnable { + public FetchRouterHealthChecksResultTask() { + } + + @Override + protected void runInContext() { + try { + final List routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); + s_logger.info("Found " + routers.size() + " running routers. Fetching, analysing and updating DB for the health checks."); + if (!RouterHealthChecksEnabled.value()) { + s_logger.debug("Skipping fetching of router health check results as router.health.checks.enabled is disabled"); + return; + } + + for (final DomainRouterVO router : routers) { + GetRouterMonitorResultsAnswer answer = fetchAndUpdateRouterHealthChecks(router, false); + List failingChecks = getFailingChecks(router, answer); + handleFailingChecks(router, failingChecks); + } + } catch (final Exception ex) { + s_logger.error("Fail to complete the FetchRouterHealthChecksResultTask! ", ex); + ex.printStackTrace(); + } + } + + private List getFailingChecks(DomainRouterVO router, GetRouterMonitorResultsAnswer answer) { + + if (answer == null) { + s_logger.warn("Unable to fetch monitor results for router " + router); + resetRouterHealthChecksAndConnectivity(router.getId(), false, "Communication failed"); + return Arrays.asList(CONNECTIVITY_TEST); + } else if (!answer.getResult()) { + s_logger.warn("Failed to fetch monitor results from router " + router + " with details: " + answer.getDetails()); + resetRouterHealthChecksAndConnectivity(router.getId(), false, "Failed to fetch results with details: " + answer.getDetails()); + return Arrays.asList(CONNECTIVITY_TEST); + } else { + resetRouterHealthChecksAndConnectivity(router.getId(), true, "Successfully fetched data"); + updateDbHealthChecksFromRouterResponse(router.getId(), answer.getMonitoringResults()); + return answer.getFailingChecks(); + } + } + + private void handleFailingChecks(DomainRouterVO router, List failingChecks) { + if (failingChecks == null || failingChecks.size() == 0) { + return; + } + + String alertMessage = "Health checks failed: " + failingChecks.size() + " failing checks on router " + router.getUuid(); + _alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(), + alertMessage, alertMessage); + s_logger.warn(alertMessage + ". Checking failed health checks to see if router needs recreate"); + + String checkFailsToRecreateVr = RouterHealthChecksFailuresToRecreateVr.valueIn(router.getDataCenterId()); + StringBuilder failingChecksEvent = new StringBuilder(); + boolean recreateRouter = false; + for (int i = 0; i < failingChecks.size(); i++) { + String failedCheck = failingChecks.get(i); + if (i == 0) { + failingChecksEvent.append("Router ") + .append(router.getUuid()) + .append(" has failing checks: "); + } + + failingChecksEvent.append(failedCheck); + if (i < failingChecks.size() - 1) { + failingChecksEvent.append(", "); + } + + if (StringUtils.isNotBlank(checkFailsToRecreateVr) && checkFailsToRecreateVr.contains(failedCheck)) { + recreateRouter = true; + } + } + + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, failingChecksEvent.toString()); + + if (recreateRouter) { + s_logger.warn("Health Check Alert: Found failing checks in " + + RouterHealthChecksFailuresToRecreateVrCK + ", attempting recreating router."); + recreateRouter(router.getId()); + } + } + } + + private DomainRouterJoinVO getAnyRouterJoinWithVpc(long routerId) { + List routerJoinVOs = domainRouterJoinDao.searchByIds(routerId); + for (DomainRouterJoinVO router : routerJoinVOs) { + if (router.getRemoved() == null && router.getVpcId() != 0) { + return router; + } + } + return null; + } + + private boolean restartVpcInDomainRouter(DomainRouterJoinVO router, User user) { + try { + s_logger.debug("Attempting restart VPC " + router.getVpcName() + " for router recreation " + router.getUuid()); + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, + "Recreating router " + router.getUuid() + " by restarting VPC " + router.getVpcUuid()); + return vpcService.restartVpc(router.getVpcId(), true, false, user); + } catch (Exception e) { + s_logger.error("Failed to restart VPC for router recreation " + + router.getVpcName() + " ,router " + router.getUuid(), e); + return false; + } + } + + private DomainRouterJoinVO getAnyRouterJoinWithGuestTraffic(long routerId) { + List routerJoinVOs = domainRouterJoinDao.searchByIds(routerId); + for (DomainRouterJoinVO router : routerJoinVOs) { + if (router.getRemoved() == null && router.getTrafficType() == TrafficType.Guest) { + return router; + } + } + return null; + } + + private boolean restartGuestNetworkInDomainRouter(DomainRouterJoinVO router, User user) { + try { + s_logger.info("Attempting restart network " + router.getNetworkName() + " for router recreation " + router.getUuid()); + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, + "Recreating router " + router.getUuid() + " by restarting network " + router.getNetworkUuid()); + return networkService.restartNetwork(router.getNetworkId(), true, false, user); + } catch (Exception e) { + s_logger.error("Failed to restart network " + router.getNetworkName() + + " for router recreation " + router.getNetworkName(), e); + return false; + } + } + + /** + * Attempts recreation of router by restarting with cleanup a VPC if any or a guest network associated in case no VPC. + * @param routerId - the id of the router to be recreated. + * @return true if successfully restart is attempted else false. + */ + private boolean recreateRouter(long routerId) { + User systemUser = _userDao.getUser(User.UID_SYSTEM); + + // Find any VPC containing router join VO, restart it and return + DomainRouterJoinVO routerJoinToRestart = getAnyRouterJoinWithVpc(routerId); + if (routerJoinToRestart != null) { + return restartVpcInDomainRouter(routerJoinToRestart, systemUser); + } + + // If no VPC containing router join VO was found we look for a guest network traffic containing join VO and restart that. + routerJoinToRestart = getAnyRouterJoinWithGuestTraffic(routerId); + if (routerJoinToRestart != null) { + return restartGuestNetworkInDomainRouter(routerJoinToRestart, systemUser); + } + + s_logger.warn("Unable to find a valid guest network or VPC to restart for recreating router id " + routerId); + return false; + } + + private Map> getHealthChecksFromDb(long routerId) { + List healthChecksList = routerHealthCheckResultDao.getHealthCheckResults(routerId); + Map> healthCheckResults = new HashMap<>(); + if (healthChecksList.isEmpty()) { + return healthCheckResults; + } + + for (RouterHealthCheckResultVO healthCheck : healthChecksList) { + if (!healthCheckResults.containsKey(healthCheck.getCheckType())) { + healthCheckResults.put(healthCheck.getCheckType(), new HashMap<>()); + } + healthCheckResults.get(healthCheck.getCheckType()).put(healthCheck.getCheckName(), healthCheck); + } + + return healthCheckResults; + } + + private RouterHealthCheckResultVO resetRouterHealthChecksAndConnectivity(final long routerId, boolean connected, String message) { + routerHealthCheckResultDao.expungeHealthChecks(routerId); + boolean newEntry = false; + RouterHealthCheckResultVO connectivityVO = routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic"); + if (connectivityVO == null) { + connectivityVO = new RouterHealthCheckResultVO(routerId, CONNECTIVITY_TEST, "basic"); + newEntry = true; + } + + connectivityVO.setCheckResult(connected); + connectivityVO.setLastUpdateTime(new Date()); + if (StringUtils.isNotEmpty(message)) { + connectivityVO.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); + } + + if (newEntry) { + routerHealthCheckResultDao.persist(connectivityVO); + } else { + routerHealthCheckResultDao.update(connectivityVO.getId(), connectivityVO); + } + + return routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic"); + } + + private RouterHealthCheckResultVO parseHealthCheckVOFromJson(final long routerId, + final String checkName, final String checkType, final Map checkData, + final Map> checksInDb) { + boolean success = Boolean.parseBoolean(checkData.get("success")); + Date lastUpdate = new Date(Long.parseLong(checkData.get("lastUpdate"))); + double lastRunDuration = Double.parseDouble(checkData.get("lastRunDuration")); + String message = checkData.get("message"); + final RouterHealthCheckResultVO hcVo; + boolean newEntry = false; + if (checksInDb.containsKey(checkType) && checksInDb.get(checkType).containsKey(checkName)) { + hcVo = checksInDb.get(checkType).get(checkName); + } else { + hcVo = new RouterHealthCheckResultVO(routerId, checkName, checkType); + newEntry = true; + } + + hcVo.setCheckResult(success); + hcVo.setLastUpdateTime(lastUpdate); + if (StringUtils.isNotEmpty(message)) { + hcVo.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); + } + + if (newEntry) { + routerHealthCheckResultDao.persist(hcVo); + } else { + routerHealthCheckResultDao.update(hcVo.getId(), hcVo); + } + s_logger.info("Found health check " + hcVo + " which took running duration (ms) " + lastRunDuration); + return hcVo; + } + + /** + * + * @param checksJson JSON expected is + * { + * checkType1: { + * checkName1: { + * success: true/false, + * lastUpdate: date string, + * lastRunDuration: ms spent on test, + * message: detailed message from check execution + * }, + * checkType2: ..... + * }, + * checkType2: ...... + * } + * @return converts the above JSON into list of RouterHealthCheckResult. + */ + private List parseHealthCheckResults( + final Map>> checksJson, final long routerId) { + final Map> checksInDb = getHealthChecksFromDb(routerId); + List healthChecks = new ArrayList<>(); + final String lastRunKey = "lastRun"; + for (String checkType : checksJson.keySet()) { + if (checksJson.get(checkType).containsKey(lastRunKey)) { // Log last run of this check type run info + Map lastRun = checksJson.get(checkType).get(lastRunKey); + s_logger.info("Found check types executed on VR " + checkType + ", start: " + lastRun.get("start") + + ", end: " + lastRun.get("end") + ", duration: " + lastRun.get("duration")); + } + + for (String checkName : checksJson.get(checkType).keySet()) { + if (lastRunKey.equals(checkName)) { + continue; + } + + try { + final RouterHealthCheckResultVO hcVo = parseHealthCheckVOFromJson( + routerId, checkName, checkType, checksJson.get(checkType).get(checkName), checksInDb); + healthChecks.add(hcVo); + } catch (Exception ex) { + s_logger.error("Skipping health check: Exception while parsing check result data for router id " + routerId + + ", check type: " + checkType + ", check name: " + checkName + ":" + ex.getLocalizedMessage(), ex); + } + } + } + return healthChecks; + } + + private List updateDbHealthChecksFromRouterResponse(final long routerId, final String monitoringResult) { + if (StringUtils.isBlank(monitoringResult)) { + s_logger.warn("Attempted parsing empty monitoring results string for router " + routerId); + return Collections.emptyList(); + } + + try { + s_logger.debug("Parsing and updating DB health check data for router: " + routerId + " with data: " + monitoringResult) ; + final Type t = new TypeToken>>>() {}.getType(); + final Map>> checks = GsonHelper.getGson().fromJson(monitoringResult, t); + return parseHealthCheckResults(checks, routerId); + } catch (JsonSyntaxException ex) { + s_logger.error("Unable to parse the result of health checks due to " + ex.getLocalizedMessage(), ex); + } + + return Collections.emptyList(); + } + + private GetRouterMonitorResultsAnswer fetchAndUpdateRouterHealthChecks(DomainRouterVO router, boolean performFreshChecks) { + if (!RouterHealthChecksEnabled.value()) { + return null; + } + + String controlIP = getRouterControlIP(router); + if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0")) { + final GetRouterMonitorResultsCommand command = new GetRouterMonitorResultsCommand(performFreshChecks); + command.setAccessDetail(NetworkElementCommand.ROUTER_IP, controlIP); + command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + try { + final Answer answer = _agentMgr.easySend(router.getHostId(), command); + + if (answer == null) { + s_logger.warn("Unable to fetch monitoring results data from router " + router.getHostName()); + return null; + } + if (answer instanceof GetRouterMonitorResultsAnswer) { + return (GetRouterMonitorResultsAnswer) answer; + } else { + s_logger.warn("Unable to fetch health checks results to router " + router.getHostName() + " Received answer " + answer.getDetails()); + return new GetRouterMonitorResultsAnswer(command, false, null, answer.getDetails()); + } + } catch (final Exception e) { + s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e); + return null; + } + } + + return null; + } + + @Override + public boolean performRouterHealthChecks(long routerId) { + DomainRouterVO router = _routerDao.findById(routerId); + + if (router == null) { + throw new CloudRuntimeException("Unable to find router with id " + routerId); + } + + if (!RouterHealthChecksEnabled.value()) { + throw new CloudRuntimeException("Router health checks are not enabled for router: " + router); + } + + s_logger.info("Running health check results for router " + router.getUuid()); + + final GetRouterMonitorResultsAnswer answer; + boolean success = true; + // Step 1: Update health check data on router and perform and retrieve health checks on router + if (!updateRouterHealthChecksConfig(router)) { + s_logger.warn("Unable to update health check config for fresh run successfully for router: " + router + ", so trying to fetch last result."); + success = false; + answer = fetchAndUpdateRouterHealthChecks(router, false); + } else { + s_logger.info("Successfully updated health check config for fresh run successfully for router: " + router); + answer = fetchAndUpdateRouterHealthChecks(router, true); + } + + // Step 2: Update health checks values in database. We do this irrespective of new health check config. + if (answer == null || !answer.getResult()) { + success = false; + resetRouterHealthChecksAndConnectivity(routerId, false, + answer == null ? "Communication failed " : "Failed to fetch results with details: " + answer.getDetails()); + } else { + resetRouterHealthChecksAndConnectivity(routerId, true, "Successfully fetched data"); + updateDbHealthChecksFromRouterResponse(routerId, answer.getMonitoringResults()); + } + + return success; + } + + protected class UpdateRouterHealthChecksConfigTask extends ManagedContextRunnable { + public UpdateRouterHealthChecksConfigTask() { + } + + @Override + protected void runInContext() { + try { + final List routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); + s_logger.debug("Found " + routers.size() + " running routers. "); + + for (final DomainRouterVO router : routers) { + updateRouterHealthChecksConfig(router); + } + } catch (final Exception ex) { + s_logger.error("Fail to complete the UpdateRouterHealthChecksConfigTask! ", ex); + } + } + } + + private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO router, List services, + boolean reconfigure, boolean deleteFromProcessedCache) { + final SetMonitorServiceCommand command = new SetMonitorServiceCommand(services); + command.setAccessDetail(NetworkElementCommand.ROUTER_IP, getRouterControlIP(router)); + command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, RouterHealthChecksEnabled.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL, RouterHealthChecksBasicInterval.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL, RouterHealthChecksAdvancedInterval.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, RouterHealthChecksToExclude.valueIn(router.getDataCenterId())); + command.setHealthChecksConfig(getRouterHealthChecksConfig(router)); + command.setReconfigureAfterUpdate(reconfigure); + command.setDeleteFromProcessedCache(deleteFromProcessedCache); // As part of updating + return command; + } + + /** + * Updates router health check config to the virtual router that it uses for health checks. + * @param router - the router ID that data needs to be sent to. + * @return success of whether data was sent or not + */ + private boolean updateRouterHealthChecksConfig(DomainRouterVO router) { + if (!RouterHealthChecksEnabled.value()) { + return false; + } + + SetMonitorServiceCommand command = createMonitorServiceCommand(router, null,true, true); + String controlIP = getRouterControlIP(router); + if (StringUtils.isBlank(controlIP) || controlIP.equals("0.0.0.0")) { + s_logger.debug("Skipping update data on router " + router.getUuid() + " because controlIp is not correct."); + return false; + } + + s_logger.info("Updating data for router health checks for router " + router.getUuid()); + Answer origAnswer = null; + try { + origAnswer = _agentMgr.easySend(router.getHostId(), command); + } catch (final Exception e) { + s_logger.error("Error while sending update data for health check to router: " + router.getInstanceName(), e); + return false; + } + + if (origAnswer == null) { + s_logger.error("Unable to update health checks data to router " + router.getHostName()); + return false; + } + + GroupAnswer answer = null; + if (origAnswer instanceof GroupAnswer) { + answer = (GroupAnswer) origAnswer; + } else { + s_logger.error("Unable to update health checks data to router " + router.getHostName() + " Received answer " + origAnswer.getDetails()); + return false; + } + + if (!answer.getResult()) { + s_logger.error("Unable to update health checks data to router " + router.getHostName() + ", details : " + answer.getDetails()); + } + + return answer.getResult(); + } + + private String getSystemThresholdsHealthChecksData(final DomainRouterVO router) { + return new StringBuilder() + .append("minDiskNeeded=" + RouterHealthChecksFreeDiskSpaceThreshold.valueIn(router.getDataCenterId())) + .append(",maxCpuUsage=" + RouterHealthChecksMaxCpuUsageThreshold.valueIn(router.getDataCenterId())) + .append(",maxMemoryUsage=" + RouterHealthChecksMaxMemoryUsageThreshold.valueIn(router.getDataCenterId()) + ";") + .toString(); + } + + private String getRouterVersionHealthChecksData(final DomainRouterVO router) { + if (router.getTemplateVersion() != null && router.getScriptsVersion() != null) { + StringBuilder routerVersion = new StringBuilder() + .append("templateVersion=" + router.getTemplateVersion()) + .append(",scriptsVersion=" + router.getScriptsVersion()); + return routerVersion.toString(); + } + return null; + } + + private void updateWithPortForwardingRules(final DomainRouterJoinVO routerJoinVO, final UserVmJoinVO vm, final StringBuilder portData) { + SearchBuilder sbpf = portForwardingDao.createSearchBuilder(); + sbpf.and("networkId", sbpf.entity().getNetworkId(), SearchCriteria.Op.EQ); + sbpf.and("instanceId", sbpf.entity().getVirtualMachineId(), SearchCriteria.Op.EQ); + SearchCriteria scpf = sbpf.create(); + scpf.setParameters("networkId", routerJoinVO.getNetworkId()); + scpf.setParameters("instanceId", vm.getId()); + List portForwardingRules = portForwardingDao.search(scpf, null); + for (PortForwardingRuleVO portForwardingRule : portForwardingRules) { + portData.append("sourceIp=").append(_ipAddressDao.findById(portForwardingRule.getSourceIpAddressId()).getAddress().toString()) + .append(",sourcePortStart=").append(portForwardingRule.getSourcePortStart()) + .append(",sourcePortEnd=").append(portForwardingRule.getSourcePortEnd()) + .append(",destIp=").append(portForwardingRule.getDestinationIpAddress()) + .append(",destPortStart=").append(portForwardingRule.getDestinationPortStart()) + .append(",destPortEnd=").append(portForwardingRule.getDestinationPortEnd()).append(";"); + } + } + + private String getStickinessPolicies(long loadBalancingRuleId) { + List stickinessPolicyVOs = lbStickinessPolicyDao.listByLoadBalancerId(loadBalancingRuleId, false); + if (stickinessPolicyVOs != null && stickinessPolicyVOs.size() > 0) { + StringBuilder stickiness = new StringBuilder(); + for (LBStickinessPolicyVO stickinessVO : stickinessPolicyVOs) { + stickiness.append(stickinessVO.getMethodName()).append(" "); + } + return stickiness.toString().trim(); + } + return "None"; + } + + private void updateWithLbRules(final DomainRouterJoinVO routerJoinVO, final StringBuilder loadBalancingData) { + List loadBalancerVOs = this.getLBRules(routerJoinVO); + for (FirewallRuleVO firewallRuleVO : loadBalancerVOs) { + List vmMapVOs = _loadBalancerVMMapDao.listByLoadBalancerId(firewallRuleVO.getId(), false); + if (vmMapVOs.size() > 0) { + + final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(routerJoinVO.getNetworkId()).getNetworkOfferingId()); + if (offering.getConcurrentConnections() == null) { + loadBalancingData.append("maxconn=").append(_configDao.getValue(Config.NetworkLBHaproxyMaxConn.key())); + } else { + loadBalancingData.append("maxconn=").append(offering.getConcurrentConnections().toString()); + } + + loadBalancingData.append(",sourcePortStart=").append(firewallRuleVO.getSourcePortStart()) + .append(",sourcePortEnd=").append(firewallRuleVO.getSourcePortEnd()); + if (firewallRuleVO instanceof LoadBalancerVO) { + LoadBalancerVO loadBalancerVO = (LoadBalancerVO) firewallRuleVO; + loadBalancingData.append(",sourceIp=").append(_ipAddressDao.findById(loadBalancerVO.getSourceIpAddressId()).getAddress().toString()) + .append(",destPortStart=").append(loadBalancerVO.getDefaultPortStart()) + .append(",destPortEnd=").append(loadBalancerVO.getDefaultPortEnd()) + .append(",algorithm=").append(loadBalancerVO.getAlgorithm()) + .append(",protocol=").append(loadBalancerVO.getLbProtocol()); + } else if (firewallRuleVO instanceof ApplicationLoadBalancerRuleVO) { + ApplicationLoadBalancerRuleVO appLoadBalancerVO = (ApplicationLoadBalancerRuleVO) firewallRuleVO; + loadBalancingData.append(",sourceIp=").append(appLoadBalancerVO.getSourceIp()) + .append(",destPortStart=").append(appLoadBalancerVO.getDefaultPortStart()) + .append(",destPortEnd=").append(appLoadBalancerVO.getDefaultPortEnd()) + .append(",algorithm=").append(appLoadBalancerVO.getAlgorithm()) + .append(",protocol=").append(appLoadBalancerVO.getLbProtocol()); + } + loadBalancingData.append(",stickiness=").append(getStickinessPolicies(firewallRuleVO.getId())); + loadBalancingData.append(",keepAliveEnabled=").append(offering.isKeepAliveEnabled()).append(",vmIps="); + for (LoadBalancerVMMapVO vmMapVO : vmMapVOs) { + loadBalancingData.append(vmMapVO.getInstanceIp()).append(" "); + } + loadBalancingData.setCharAt(loadBalancingData.length() - 1, ';'); + } + } + } + + private Map getRouterHealthChecksConfig(final DomainRouterVO router) { + Map data = new HashMap<>(); + List routerJoinVOs = domainRouterJoinDao.searchByIds(router.getId()); + StringBuilder vmsData = new StringBuilder(); + StringBuilder portData = new StringBuilder(); + StringBuilder loadBalancingData = new StringBuilder(); + StringBuilder gateways = new StringBuilder(); + gateways.append("gatewaysIps="); + for (DomainRouterJoinVO routerJoinVO : routerJoinVOs) { + if (StringUtils.isNotBlank(routerJoinVO.getGateway())) { + gateways.append(routerJoinVO.getGateway() + " "); + } + SearchBuilder sbvm = userVmJoinDao.createSearchBuilder(); + sbvm.and("networkId", sbvm.entity().getNetworkId(), SearchCriteria.Op.EQ); + SearchCriteria scvm = sbvm.create(); + scvm.setParameters("networkId", routerJoinVO.getNetworkId()); + List vms = userVmJoinDao.search(scvm, null); + for (UserVmJoinVO vm : vms) { + if (vm.getState() != VirtualMachine.State.Running) { + continue; + } + + vmsData.append("vmName=").append(vm.getName()) + .append(",macAddress=").append(vm.getMacAddress()) + .append(",ip=").append(vm.getIpAddress()).append(";"); + updateWithPortForwardingRules(routerJoinVO, vm, portData); + } + updateWithLbRules(routerJoinVO, loadBalancingData); + } + + String routerVersion = getRouterVersionHealthChecksData(router); + data.put("virtualMachines", vmsData.toString()); + data.put("gateways", gateways.toString()); + data.put("portForwarding", portData.toString()); + data.put("haproxyData", loadBalancingData.toString()); + data.put("systemThresholds", getSystemThresholdsHealthChecksData(router)); + if (routerVersion != null) { + data.put("routerVersion", routerVersion); + } + return data; + } + + private List getLBRules(final DomainRouterJoinVO router) { + if (router.getRole() == Role.VIRTUAL_ROUTER) { + SearchBuilder sblb = _loadBalancerDao.createSearchBuilder(); + sblb.and("networkId", sblb.entity().getNetworkId(), SearchCriteria.Op.EQ); + sblb.and("sourceIpAddressId", sblb.entity().getSourceIpAddressId(), SearchCriteria.Op.NNULL); + SearchCriteria sclb = sblb.create(); + sclb.setParameters("networkId", router.getNetworkId()); + return _loadBalancerDao.search(sclb, null); + } else if (router.getRole() == Role.INTERNAL_LB_VM) { + SearchBuilder sbalb = applicationLoadBalancerRuleDao.createSearchBuilder(); + sbalb.and("networkId", sbalb.entity().getNetworkId(), SearchCriteria.Op.EQ); + sbalb.and("sourceIpAddress", sbalb.entity().getSourceIp(), SearchCriteria.Op.NNULL); + SearchCriteria sclb = sbalb.create(); + sclb.setParameters("networkId", router.getNetworkId()); + return applicationLoadBalancerRuleDao.search(sclb, null); + } + return Collections.emptyList(); + } + protected class CheckRouterAlertsTask extends ManagedContextRunnable { public CheckRouterAlertsTask() { } @@ -1205,12 +1843,11 @@ protected void getRouterAlerts() { final List routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); s_logger.debug("Found " + routers.size() + " running routers. "); - for (final DomainRouterVO router : routers) { final String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId()); // Skip the routers in VPC network or skip the routers where // Monitor service is not enabled in the corresponding Zone - if (!Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) { + if (!Boolean.parseBoolean(serviceMonitoringFlag)) { continue; } String controlIP = getRouterControlIP(router); @@ -1253,7 +1890,7 @@ protected void getRouterAlerts() { final String alerts[] = answer.getAlerts(); if (alerts != null) { final String lastAlertTimeStamp = answer.getTimeStamp(); - final SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); + final SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); sdfrmt.setLenient(false); try { sdfrmt.parse(lastAlertTimeStamp); @@ -1667,19 +2304,7 @@ public boolean finalizeCommandsOnStart(final Commands cmds, final VirtualMachine if (reprogramGuestNtwks) { finalizeIpAssocForNetwork(cmds, router, provider, guestNetworkId, null); finalizeNetworkRulesForNetwork(cmds, router, provider, guestNetworkId); - - final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(guestNetworkId).getNetworkOfferingId()); - // service monitoring is currently not added in RVR - if (!offering.isRedundantRouter()) { - final String serviceMonitringSet = _configDao.getValue(Config.EnableServiceMonitoring.key()); - - if (serviceMonitringSet != null && serviceMonitringSet.equalsIgnoreCase("true")) { - finalizeMonitorServiceOnStrat(cmds, profile, router, provider, guestNetworkId, true); - } else { - finalizeMonitorServiceOnStrat(cmds, profile, router, provider, guestNetworkId, false); - } - } - + finalizeMonitorService(cmds, profile, router, provider, guestNetworkId, true); } finalizeUserDataAndDhcpOnStart(cmds, router, provider, guestNetworkId); @@ -1692,31 +2317,38 @@ public boolean finalizeCommandsOnStart(final Commands cmds, final VirtualMachine return true; } - private void finalizeMonitorServiceOnStrat(final Commands cmds, final VirtualMachineProfile profile, final DomainRouterVO router, final Provider provider, - final long networkId, final Boolean add) { + protected void finalizeMonitorService(final Commands cmds, final VirtualMachineProfile profile, final DomainRouterVO router, final Provider provider, + final long networkId, boolean onStart) { + final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(networkId).getNetworkOfferingId()); + if (offering.isRedundantRouter()) { + // service monitoring is currently not added in RVR + return; + } + final String serviceMonitoringSet = _configDao.getValue(Config.EnableServiceMonitoring.key()); + final Boolean isMonitoringServicesEnabled = serviceMonitoringSet != null && serviceMonitoringSet.equalsIgnoreCase("true"); final NetworkVO network = _networkDao.findById(networkId); s_logger.debug("Creating monitoring services on " + router + " start..."); // get the list of sevices for this network to monitor final List services = new ArrayList(); - if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, Provider.VirtualRouter) - || _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, Provider.VirtualRouter)) { + if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, provider) + || _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, provider)) { final MonitoringServiceVO dhcpService = _monitorServiceDao.getServiceByName(MonitoringService.Service.Dhcp.toString()); if (dhcpService != null) { services.add(dhcpService); } } - if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, Provider.VirtualRouter)) { + if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, provider)) { final MonitoringServiceVO lbService = _monitorServiceDao.getServiceByName(MonitoringService.Service.LoadBalancing.toString()); if (lbService != null) { services.add(lbService); } } - final List defaultServices = _monitorServiceDao.listDefaultServices(true); - services.addAll(defaultServices); + + services.addAll(getDefaultServicesToMonitor(network)); final List servicesTO = new ArrayList(); for (final MonitoringServiceVO service : services) { @@ -1734,17 +2366,21 @@ private void finalizeMonitorServiceOnStrat(final Commands cmds, final VirtualMac if (controlNic == null) { throw new CloudRuntimeException("VirtualMachine " + profile.getInstanceName() + " doesn't have a control interface"); } - final SetMonitorServiceCommand command = new SetMonitorServiceCommand(servicesTO); - command.setAccessDetail(NetworkElementCommand.ROUTER_IP, controlNic.getIPv4Address()); - command.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, _routerControlHelper.getRouterIpInNetwork(networkId, router.getId())); - command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); - if (!add) { - command.setAccessDetail(NetworkElementCommand.ROUTER_MONITORING_ENABLE, add.toString()); + // As part of aggregate command we don't need to reconfigure if onStart and persist in processed cache. Subsequent updates are not needed. + SetMonitorServiceCommand command = createMonitorServiceCommand(router, servicesTO, !onStart, false); + command.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, _routerControlHelper.getRouterIpInNetwork(networkId, router.getId())); + if (!isMonitoringServicesEnabled) { + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_MONITORING_ENABLED, isMonitoringServicesEnabled.toString()); } + cmds.addCommand("monitor", command); } + protected List getDefaultServicesToMonitor(final NetworkVO network) { + return _monitorServiceDao.listDefaultServices(true); + } + protected NicProfile getControlNic(final VirtualMachineProfile profile) { final DomainRouterVO router = _routerDao.findById(profile.getId()); final DataCenterVO dcVo = _dcDao.findById(router.getDataCenterId()); @@ -2599,7 +3235,22 @@ public String getConfigComponentName() { @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] { UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval }; + return new ConfigKey[] { + UseExternalDnsServers, + RouterVersionCheckEnabled, + SetServiceMonitor, + RouterAlertsCheckInterval, + RouterHealthChecksEnabled, + RouterHealthChecksBasicInterval, + RouterHealthChecksAdvancedInterval, + RouterHealthChecksConfigRefreshInterval, + RouterHealthChecksResultFetchInterval, + RouterHealthChecksFailuresToRecreateVr, + RouterHealthChecksToExclude, + RouterHealthChecksFreeDiskSpaceThreshold, + RouterHealthChecksMaxCpuUsageThreshold, + RouterHealthChecksMaxMemoryUsageThreshold + }; } @Override diff --git a/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java b/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java index 80b1797e8485..4b6da55b2382 100644 --- a/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java +++ b/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java @@ -18,6 +18,7 @@ import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -26,6 +27,9 @@ import javax.inject.Inject; import javax.naming.ConfigurationException; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.Command.OnError; @@ -34,6 +38,7 @@ import com.cloud.agent.api.SetupGuestNetworkCommand; import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; +import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.agent.manager.Commands; import com.cloud.dc.DataCenter; import com.cloud.deploy.DeployDestination; @@ -42,7 +47,11 @@ import com.cloud.exception.InsufficientCapacityException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.ResourceUnavailableException; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.hypervisor.HypervisorGuru; +import com.cloud.hypervisor.HypervisorGuruManager; import com.cloud.network.IpAddress; +import com.cloud.network.MonitoringService; import com.cloud.network.Network; import com.cloud.network.Network.Provider; import com.cloud.network.Network.Service; @@ -54,6 +63,8 @@ import com.cloud.network.VirtualRouterProvider; import com.cloud.network.addr.PublicIp; import com.cloud.network.dao.IPAddressVO; +import com.cloud.network.dao.MonitoringServiceVO; +import com.cloud.network.dao.NetworkVO; import com.cloud.network.dao.RemoteAccessVpnVO; import com.cloud.network.vpc.NetworkACLItemDao; import com.cloud.network.vpc.NetworkACLItemVO; @@ -72,6 +83,9 @@ import com.cloud.network.vpc.dao.StaticRouteDao; import com.cloud.network.vpc.dao.VpcGatewayDao; import com.cloud.network.vpn.Site2SiteVpnManager; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.template.VirtualMachineTemplate; +import com.cloud.user.Account; import com.cloud.user.UserStatisticsVO; import com.cloud.utils.Pair; import com.cloud.utils.db.EntityManager; @@ -87,14 +101,8 @@ import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.VirtualMachineProfile.Param; +import com.cloud.vm.VirtualMachineProfileImpl; import com.cloud.vm.dao.VMInstanceDao; -import com.cloud.agent.api.to.VirtualMachineTO; -import com.cloud.hypervisor.Hypervisor; -import com.cloud.hypervisor.HypervisorGuru; -import com.cloud.hypervisor.HypervisorGuruManager; - -import org.apache.log4j.Logger; -import org.springframework.stereotype.Component; @Component public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplianceManagerImpl implements VpcVirtualNetworkApplianceManager { @@ -151,8 +159,9 @@ public boolean addVpcRouterToGuestNetwork(final VirtualRouter router, final Netw result = false; } // 3) apply networking rules - if (result && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true) { - sendNetworkRulesToRouter(router.getId(), network.getId()); + if (result) { + boolean reprogramNetwork = params != null && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true; + sendNetworkRulesToRouter(router.getId(), network.getId(), reprogramNetwork); } } catch (final Exception ex) { s_logger.warn("Failed to add router " + router + " to network " + network + " due to ", ex); @@ -454,19 +463,25 @@ public boolean finalizeCommandsOnStart(final Commands cmds, final VirtualMachine throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString()); } + if (reprogramGuestNtwks && publicNics.size() > 0) { + finalizeMonitorService(cmds, profile, domainRouterVO, provider, publicNics.get(0).second().getId(), true); + } + for (final Pair nicNtwk : guestNics) { final Nic guestNic = nicNtwk.first(); + final long guestNetworkId = guestNic.getNetworkId(); final AggregationControlCommand startCmd = new AggregationControlCommand(Action.Start, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork( - guestNic.getNetworkId(), domainRouterVO.getId())); + guestNetworkId, domainRouterVO.getId())); cmds.addCommand(startCmd); if (reprogramGuestNtwks) { - finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId(), vlanMacAddress); - finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId()); + finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNetworkId, vlanMacAddress); + finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNetworkId); + finalizeMonitorService(cmds, profile, domainRouterVO, provider, guestNetworkId, true); } - finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNic.getNetworkId()); + finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNetworkId); final AggregationControlCommand finishCmd = new AggregationControlCommand(Action.Finish, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork( - guestNic.getNetworkId(), domainRouterVO.getId())); + guestNetworkId, domainRouterVO.getId())); cmds.addCommand(finishCmd); } @@ -476,6 +491,14 @@ public boolean finalizeCommandsOnStart(final Commands cmds, final VirtualMachine return true; } + @Override + protected List getDefaultServicesToMonitor(NetworkVO network) { + if (network.getTrafficType() == TrafficType.Public) { + return Arrays.asList(_monitorServiceDao.getServiceByName(MonitoringService.Service.Ssh.toString())); + } + return super.getDefaultServicesToMonitor(network); + } + @Override protected void finalizeNetworkRulesForNetwork(final Commands cmds, final DomainRouterVO domainRouterVO, final Provider provider, final Long guestNetworkId) { @@ -495,7 +518,7 @@ protected void finalizeNetworkRulesForNetwork(final Commands cmds, final DomainR } } - protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId) throws ResourceUnavailableException { + protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId, final boolean reprogramNetwork) throws ResourceUnavailableException { final DomainRouterVO router = _routerDao.findById(routerId); final Commands cmds = new Commands(OnError.Continue); @@ -508,10 +531,26 @@ protected boolean sendNetworkRulesToRouter(final long routerId, final long netwo throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString()); } - finalizeNetworkRulesForNetwork(cmds, router, provider, networkId); + if (reprogramNetwork) { + finalizeNetworkRulesForNetwork(cmds, router, provider, networkId); + } + + finalizeMonitorService(cmds, getVirtualMachineProfile(router), router, provider, networkId, false); + return _nwHelper.sendCommandsToRouter(router, cmds); } + private VirtualMachineProfile getVirtualMachineProfile(DomainRouterVO router) { + final ServiceOfferingVO offering = _serviceOfferingDao.findById(router.getId(), router.getServiceOfferingId()); + final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, router.getTemplateId()); + final Account owner = _entityMgr.findById(Account.class, router.getAccountId()); + final VirtualMachineProfileImpl profile = new VirtualMachineProfileImpl(router, template, offering, owner, null); + for (final NicProfile nic : _networkMgr.getNicProfiles(router)) { + profile.addNic(nic); + } + return profile; + } + /** * @param router * @param add diff --git a/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java b/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java index b1b1059c1236..839280c31965 100644 --- a/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java +++ b/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java @@ -46,6 +46,7 @@ import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd; import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd; import org.apache.cloudstack.api.command.user.vpc.ListVPCOfferingsCmd; +import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; @@ -1697,16 +1698,21 @@ public boolean cleanupVpcResources(final long vpcId, final Account caller, final return success; } + @Override @ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc") - public boolean restartVpc(final long vpcId, final boolean cleanUp, final boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, + public boolean restartVpc(final RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { - - final Account callerAccount = CallContext.current().getCallingAccount(); + final long vpcId = cmd.getId(); + final boolean cleanUp = cmd.getCleanup(); + final boolean makeRedundant = cmd.getMakeredundant(); final User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); - final ReservationContext context = new ReservationContextImpl(null, null, callerUser, callerAccount); + return restartVpc(vpcId, cleanUp, makeRedundant, callerUser); + } - // Verify input parameters + @Override + @ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc") + public boolean restartVpc(Long vpcId, boolean cleanUp, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { Vpc vpc = getActiveVpc(vpcId); if (vpc == null) { final InvalidParameterValueException ex = new InvalidParameterValueException("Unable to find Enabled VPC by id specified"); @@ -1714,6 +1720,8 @@ public boolean restartVpc(final long vpcId, final boolean cleanUp, final boolean throw ex; } + Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId()); + final ReservationContext context = new ReservationContextImpl(null, null, user, callerAccount); _accountMgr.checkAccess(callerAccount, null, false, vpc); s_logger.debug("Restarting VPC " + vpc); diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 147c527b2739..01fe2eb88336 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -171,6 +171,7 @@ import org.apache.cloudstack.api.command.admin.router.ConfigureVirtualRouterElementCmd; import org.apache.cloudstack.api.command.admin.router.CreateVirtualRouterElementCmd; import org.apache.cloudstack.api.command.admin.router.DestroyRouterCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListOvsElementsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.router.ListVirtualRouterElementsCmd; @@ -3115,6 +3116,7 @@ public List> getCommands() { cmdList.add(ListMgmtsCmd.class); cmdList.add(GetUploadParamsForIsoCmd.class); cmdList.add(ListTemplateOVFProperties.class); + cmdList.add(GetRouterHealthCheckResultsCmd.class); // Out-of-band management APIs for admins cmdList.add(EnableOutOfBandManagementForHostCmd.class); diff --git a/server/src/test/java/com/cloud/keystore/KeystoreTest.java b/server/src/test/java/com/cloud/keystore/KeystoreTest.java index 1981a7315f4b..24cc3a74d418 100644 --- a/server/src/test/java/com/cloud/keystore/KeystoreTest.java +++ b/server/src/test/java/com/cloud/keystore/KeystoreTest.java @@ -16,17 +16,16 @@ // under the License. package com.cloud.keystore; -import junit.framework.TestCase; - +import org.apache.cloudstack.api.response.AlertResponse; +import org.apache.cloudstack.api.response.UserVmResponse; import org.apache.log4j.Logger; import org.junit.After; import org.junit.Before; -import org.apache.cloudstack.api.response.AlertResponse; -import org.apache.cloudstack.api.response.UserVmResponse; - import com.cloud.api.ApiSerializerHelper; +import junit.framework.TestCase; + public class KeystoreTest extends TestCase { private final static Logger s_logger = Logger.getLogger(KeystoreTest.class); diff --git a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java index 09fd997f6184..7a6cc8bbeee0 100644 --- a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java +++ b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java @@ -213,11 +213,16 @@ public boolean deleteNetwork(long networkId, boolean forced) { return false; } + @Override + public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { + return false; + } + /* (non-Javadoc) * @see com.cloud.network.NetworkService#restartNetwork(com.cloud.api.commands.RestartNetworkCmd, boolean) */ @Override - public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, + public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { // TODO Auto-generated method stub return false; diff --git a/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java b/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java index a85d039cd13f..d1b951a92015 100644 --- a/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java +++ b/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java @@ -248,6 +248,11 @@ public List upgradeRouterTemplate(final UpgradeRouterTemplateCmd cmd) { return null; //To change body of implemented methods use File | Settings | File Templates. } + @Override + public boolean performRouterHealthChecks(long routerId) { + return false; + } + @Override public boolean prepareAggregatedExecution(final Network network, final List routers) throws AgentUnavailableException { return true; //To change body of implemented methods use File | Settings | File Templates. diff --git a/systemvm/debian/etc/logrotate.d/monitor b/systemvm/debian/etc/logrotate.d/monitor new file mode 100644 index 000000000000..769f8d5e7e14 --- /dev/null +++ b/systemvm/debian/etc/logrotate.d/monitor @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +/var/log/monitor.log { + rotate 5 + maxsize 10M + missingok + notifempty + compress + copytruncate +} diff --git a/systemvm/debian/etc/logrotate.d/routerServiceMonitor b/systemvm/debian/etc/logrotate.d/routerServiceMonitor new file mode 100644 index 000000000000..7202441f66ac --- /dev/null +++ b/systemvm/debian/etc/logrotate.d/routerServiceMonitor @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +/var/log/routerServiceMonitor.log { + rotate 5 + maxsize 10M + missingok + notifempty + compress + copytruncate +} diff --git a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py index 6b194238b1ac..5a0ff5b114c4 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py @@ -17,27 +17,67 @@ import logging from cs.CsDatabag import CsDataBag from CsFile import CsFile +import json MON_CONFIG = "/etc/monitor.conf" +HC_CONFIG = "/root/health_checks_data.json" class CsMonitor(CsDataBag): - """ Manage dhcp entries """ + """ Manage Monitor script schedule and health checks for router """ - def process(self): - if "config" not in self.dbag: - return - procs = [x.strip() for x in self.dbag['config'].split(',')] - file = CsFile(MON_CONFIG) - for proc in procs: - bits = [x for x in proc.split(':')] - if len(bits) < 5: - continue - for i in range(0, 4): - file.add(bits[i], -1) - file.commit() + def get_basic_check_interval(self): + return self.dbag["health_checks_basic_run_interval"] if "health_checks_basic_run_interval" in self.dbag else 3 + + def get_advanced_check_interval(self): + return self.dbag["health_checks_advanced_run_interval"] if "health_checks_advanced_run_interval" in self.dbag else 0 + + def setupMonitorConfigFile(self): + if "config" in self.dbag: + procs = [x.strip() for x in self.dbag['config'].split(',')] + file = CsFile(MON_CONFIG) + for proc in procs: + bits = [x for x in proc.split(':')] + if len(bits) < 5: + continue + for i in range(0, 4): + file.add(bits[i], -1) + file.commit() + + def setupHealthCheckCronJobs(self): + cron_rep_basic = self.get_basic_check_interval() + cron_rep_advanced = self.get_advanced_check_interval() cron = CsFile("/etc/cron.d/process") + cron.deleteLine("root /usr/bin/python /root/monitorServices.py") cron.add("SHELL=/bin/bash", 0) cron.add("PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin", 1) - cron.add("*/3 * * * * root /usr/bin/python /root/monitorServices.py", -1) + if cron_rep_basic > 0: + cron.add("*/" + str(cron_rep_basic) + " * * * * root /usr/bin/python /root/monitorServices.py basic", -1) + if cron_rep_advanced > 0: + cron.add("*/" + str(cron_rep_advanced) + " * * * * root /usr/bin/python /root/monitorServices.py advanced", -1) cron.commit() + + def setupHealthChecksConfigFile(self): + hc_data = {} + hc_data["health_checks_basic_run_interval"] = self.get_basic_check_interval() + hc_data["health_checks_advanced_run_interval"] = self.get_advanced_check_interval() + hc_data["health_checks_enabled"] = self.dbag["health_checks_enabled"] if "health_checks_enabled" in self.dbag else False + + if "excluded_health_checks" in self.dbag: + excluded_checks = self.dbag["excluded_health_checks"] + hc_data["excluded_health_checks"] = [ch.strip() for ch in excluded_checks.split(",")] if len(excluded_checks) > 0 else [] + else: + hc_data["excluded_health_checks"] = [] + + if "health_checks_config" in self.dbag: + hc_data["health_checks_config"] = self.dbag["health_checks_config"] + else: + hc_data["health_checks_config"] = {} + + with open(HC_CONFIG, 'w') as f: + json.dump(hc_data, f, ensure_ascii=False, indent=4) + + def process(self): + self.setupMonitorConfigFile() + self.setupHealthChecksConfigFile() + self.setupHealthCheckCronJobs() diff --git a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py index 75a7c95d6fac..55c89dfb59b3 100755 --- a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py +++ b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py @@ -22,4 +22,15 @@ def merge(dbag, data): if "config" in data: dbag['config'] = data["config"] + if "health_checks_enabled" in data: + dbag["health_checks_enabled"] = data["health_checks_enabled"] + if "health_checks_basic_run_interval" in data: + dbag["health_checks_basic_run_interval"] = data["health_checks_basic_run_interval"] + if "health_checks_advanced_run_interval" in data: + dbag["health_checks_advanced_run_interval"] = data["health_checks_advanced_run_interval"] + if "excluded_health_checks" in data: + dbag["excluded_health_checks"] = data["excluded_health_checks"] + if "health_checks_config" in data: + dbag["health_checks_config"] = data["health_checks_config"] + return dbag diff --git a/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh b/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh new file mode 100755 index 000000000000..bdc709d265f4 --- /dev/null +++ b/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# getRouterMonitorResults.sh --- Send the monitor results to Management Server + +if [ "$1" == "true" ] +then + python /root/monitorServices.py > /dev/null +fi + +printf "FAILING CHECKS:\n" + +if [ -f /root/basic_failing_health_checks ] +then + echo `cat /root/basic_failing_health_checks` +fi + +if [ -f /root/advanced_failing_health_checks ] +then + echo `cat /root/advanced_failing_health_checks` +fi + +printf "MONITOR RESULTS:\n" + +echo "{\"basic\":" +if [ -f /root/basic_monitor_results.json ] +then + echo `cat /root/basic_monitor_results.json` +else + echo "{}" +fi +echo ",\"advanced\":" +if [ -f /root/advanced_monitor_results.json ] +then + echo `cat /root/advanced_monitor_results.json` +else + echo "{}" +fi + +echo "}" diff --git a/systemvm/debian/opt/cloud/bin/merge.py b/systemvm/debian/opt/cloud/bin/merge.py index 54d86c5e8bb2..b988b7a3e37e 100755 --- a/systemvm/debian/opt/cloud/bin/merge.py +++ b/systemvm/debian/opt/cloud/bin/merge.py @@ -301,6 +301,7 @@ def load(self, data): if self.keep: self.__moveFile(filename, self.configCache + "/processed") else: + logging.debug("Processed file deleted: %s and not kept in /processed", filename) os.remove(filename) updateDataBag(self) diff --git a/systemvm/debian/opt/cloud/bin/update_config.py b/systemvm/debian/opt/cloud/bin/update_config.py index 77008afb794b..c9121eb634f9 100755 --- a/systemvm/debian/opt/cloud/bin/update_config.py +++ b/systemvm/debian/opt/cloud/bin/update_config.py @@ -29,7 +29,8 @@ logging.basicConfig(filename='/var/log/cloud.log', level=logging.INFO, format='%(asctime)s %(filename)s %(funcName)s:%(lineno)d %(message)s') # first commandline argument should be the file to process -if (len(sys.argv) != 2): +argc = len(sys.argv) +if argc != 2 and argc != 3: logging.error("Invalid usage, args passed: %s" % sys.argv) sys.exit(1) @@ -49,6 +50,9 @@ def finish_config(): def process_file(): logging.info("Processing JSON file %s" % sys.argv[1]) qf = QueueFile() + if len(sys.argv) > 2 and sys.argv[2].lower() == "false": + qf.keep = False + qf.setFile(sys.argv[1]) qf.load(None) # These can be safely deferred, dramatically speeding up loading times diff --git a/systemvm/debian/root/health_checks/__init__.py b/systemvm/debian/root/health_checks/__init__.py new file mode 100644 index 000000000000..3dcbe822698f --- /dev/null +++ b/systemvm/debian/root/health_checks/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Needed to expose utility as package outside for monitorServices.py. +# This directory should only contain executables for health checks. diff --git a/systemvm/debian/root/health_checks/cpu_usage_check.py b/systemvm/debian/root/health_checks/cpu_usage_check.py new file mode 100644 index 000000000000..5e6a2fe5e9e8 --- /dev/null +++ b/systemvm/debian/root/health_checks/cpu_usage_check.py @@ -0,0 +1,56 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from subprocess import * +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "maxCpuUsage" not in data: + print "Missing maxCpuUsage in health_checks_data systemThresholds, skipping" + exit(0) + + maxCpuUsage = float(data["maxCpuUsage"]) + cmd = "top -b -n2 -p 1 | fgrep \"Cpu(s)\" | tail -1 | " \ + "awk -F 'id,' " \ + "'{ split($1, vs, \",\"); idle=vs[length(vs)]; " \ + "sub(\"%\", \"\", idle); printf \"%.2f\", 100 - idle }'" + pout = Popen(cmd, shell=True, stdout=PIPE) + if pout.wait() == 0: + currentUsage = float(pout.communicate()[0].strip()) + if currentUsage > maxCpuUsage: + print "CPU Usage " + str(currentUsage) + \ + "% has crossed threshold of " + str(maxCpuUsage) + "%" + exit(1) + print "CPU Usage within limits with current at " \ + + str(currentUsage) + "%" + exit(0) + else: + print "Failed to retrieve cpu usage using " + cmd + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/dhcp_check.py b/systemvm/debian/root/health_checks/dhcp_check.py new file mode 100755 index 000000000000..be7a8407e395 --- /dev/null +++ b/systemvm/debian/root/health_checks/dhcp_check.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData + + +def main(): + vMs = getHealthChecksData("virtualMachines") + + if vMs is None or len(vMs) == 0: + print "No VMs running data available, skipping" + exit(0) + + with open('/etc/dhcphosts.txt', 'r') as hostsFile: + allHosts = hostsFile.readlines() + hostsFile.close() + + failedCheck = False + failureMessage = "Missing elements in dhcphosts.txt - \n" + for vM in vMs: + entry = vM["macAddress"] + " " + vM["ip"] + " " + vM["vmName"] + foundEntry = False + for host in allHosts: + host = host.strip().split(',') + if len(host) < 4: + continue + + if host[0].strip() == vM["macAddress"] and host[1].strip() == vM["ip"]\ + and host[2].strip() == vM["vmName"]: + foundEntry = True + break + + nonDefaultSet = "set:" + vM["ip"].replace(".", "_") + if host[0].strip() == vM["macAddress"] and host[1].strip() == nonDefaultSet \ + and host[2].strip() == vM["ip"] and host[3].strip() == vM["vmName"]: + foundEntry = True + break + + if not foundEntry: + failedCheck = True + failureMessage = failureMessage + entry + ", " + + if failedCheck: + print failureMessage[:-2] + exit(1) + else: + print "All " + str(len(vMs)) + " VMs are present in dhcphosts.txt" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/disk_space_check.py b/systemvm/debian/root/health_checks/disk_space_check.py new file mode 100644 index 000000000000..af8cb3dd07cf --- /dev/null +++ b/systemvm/debian/root/health_checks/disk_space_check.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "minDiskNeeded" not in data: + print "Missing minDiskNeeded in health_checks_data systemThresholds, skipping" + exit(0) + + minDiskNeeded = float(data["minDiskNeeded"]) * 1024 + s = statvfs('/') + freeSpace = (s.f_bavail * s.f_frsize) / 1024 + + if (freeSpace < minDiskNeeded): + print "Insufficient free space is " + str(freeSpace/1024) + " MB" + exit(1) + else: + print "Sufficient free space is " + str(freeSpace/1024) + " MB" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/dns_check.py b/systemvm/debian/root/health_checks/dns_check.py new file mode 100644 index 000000000000..c17788812330 --- /dev/null +++ b/systemvm/debian/root/health_checks/dns_check.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData + + +def main(): + vMs = getHealthChecksData("virtualMachines") + + if vMs is None or len(vMs) == 0: + print "No VMs running data available, skipping" + exit(0) + + with open('/etc/hosts', 'r') as hostsFile: + allHosts = hostsFile.readlines() + hostsFile.close() + + failedCheck = False + failureMessage = "Missing entries for VMs in /etc/hosts -\n" + for vM in vMs: + foundEntry = False + for host in allHosts: + components = host.split('\t') + if len(components) == 2 and components[0].strip() == vM["ip"] \ + and components[1].strip() == vM["vmName"]: + foundEntry = True + break + + if not foundEntry: + failedCheck = True + failureMessage = failureMessage + vM["ip"] + " " + vM["vmName"] + ", " + + if failedCheck: + print failureMessage[:-2] + exit(1) + else: + print "All " + str(len(vMs)) + " VMs are present in /etc/hosts" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/gateways_check.py b/systemvm/debian/root/health_checks/gateways_check.py new file mode 100644 index 000000000000..29ce884ca997 --- /dev/null +++ b/systemvm/debian/root/health_checks/gateways_check.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from subprocess import * +from utility import getHealthChecksData + + +def main(): + gws = getHealthChecksData("gateways") + if gws is None and len(gws) == 0: + print "No gateways data available, skipping" + exit(0) + + unreachableGateWays = [] + gwsList = gws[0]["gatewaysIps"].strip().split(' ') + for gw in gwsList: + if len(gw) == 0: + continue + reachableGw = False + for i in range(5): + pingCmd = "ping " + gw + " -c 5 -w 10" + pout = Popen(pingCmd, shell=True, stdout=PIPE) + if pout.wait() == 0: + reachableGw = True + break + + if not reachableGw: + unreachableGateWays.append(gw) + + if len(unreachableGateWays) == 0: + print "All " + str(len(gws)) + " gateways are reachable via ping" + exit(0) + else: + print "Unreachable gateways found-" + print unreachableGateWays + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/haproxy_check.py b/systemvm/debian/root/health_checks/haproxy_check.py new file mode 100644 index 000000000000..56e0ce7d0b0d --- /dev/null +++ b/systemvm/debian/root/health_checks/haproxy_check.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData, formatPort + + +def checkMaxconn(haproxyData, haCfgSections): + if "maxconn" in haproxyData and "maxconn" in haCfgSections["global"]: + if haproxyData["maxconn"] != haCfgSections["global"]["maxconn"][0].strip(): + print "global maxconn mismatch occured" + return False + + return True + + +def checkLoadBalance(haproxyData, haCfgSections): + correct = True + for lbSec in haproxyData: + srcServer = lbSec["sourceIp"].replace('.', '_') + "-" + \ + formatPort(lbSec["sourcePortStart"], + lbSec["sourcePortEnd"]) + secName = "listen " + srcServer + + if secName not in haCfgSections: + print "Missing section for load balancing " + secName + "\n" + correct = False + else: + cfgSection = haCfgSections[secName] + if "server" in cfgSection: + if lbSec["algorithm"] != cfgSection["balance"][0]: + print "Incorrect balance method for " + secName + \ + "Expected : " + lbSec["algorithm"] + \ + " but found " + cfgSection["balance"][0] + "\n" + correct = False + + bindStr = lbSec["sourceIp"] + ":" + formatPort(lbSec["sourcePortStart"], lbSec["sourcePortEnd"]) + if cfgSection["bind"][0] != bindStr: + print "Incorrect bind string found. Expected " + bindStr + " but found " + cfgSection["bind"][0] + "." + correct = False + + if (lbSec["sourcePortStart"] == "80" and lbSec["sourcePortEnd"] == "80" and lbSec["keepAliveEnabled"] == "false") \ + or (lbSec["stickiness"].find("AppCookie") != -1 or lbSec["stickiness"].find("LbCookie") != -1): + if not ("mode" in cfgSection and cfgSection["mode"][0] == "http"): + print "Expected HTTP mode but not found" + correct = False + + expectedServerIps = lbSec["vmIps"].split(" ") + for expectedServerIp in expectedServerIps: + pattern = expectedServerIp + ":" + \ + formatPort(lbSec["destPortStart"], + lbSec["destPortEnd"]) + foundPattern = False + for server in cfgSection["server"]: + s = server.split() + if s[0].strip().find(srcServer + "_") == 0 and s[1].strip() == pattern: + foundPattern = True + break + + if not foundPattern: + correct = False + print "Missing load balancing for " + pattern + ". " + + return correct + + +def main(): + ''' + Checks for max con and each load balancing rule - source ip, ports and destination + ips and ports. Also checks for http mode. Does not check for stickiness policies. + ''' + haproxyData = getHealthChecksData("haproxyData") + if haproxyData is None or len(haproxyData) == 0: + print "No data provided to check, skipping" + exit(0) + + with open("/etc/haproxy/haproxy.cfg", 'r') as haCfgFile: + haCfgLines = haCfgFile.readlines() + haCfgFile.close() + + if len(haCfgLines) == 0: + print "Unable to read config file /etc/haproxy/haproxy.cfg" + exit(1) + + haCfgSections = {} + currSection = None + currSectionDict = {} + for line in haCfgLines: + line = line.strip() + if len(line) == 0: + if currSection is not None and len(currSectionDict) > 0: + haCfgSections[currSection] = currSectionDict + + currSection = None + currSectionDict = {} + continue + + if currSection is None: + currSection = line + else: + lineSec = line.split(' ', 1) + if lineSec[0] not in currSectionDict: + currSectionDict[lineSec[0]] = [] + + currSectionDict[lineSec[0]].append(lineSec[1] if len(lineSec) > 1 else '') + + checkMaxConn = checkMaxconn(haproxyData[0], haCfgSections) + checkLbRules = checkLoadBalance(haproxyData, haCfgSections) + + if checkMaxConn and checkLbRules: + print "All checks pass" + exit(0) + else: + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/iptables_check.py b/systemvm/debian/root/health_checks/iptables_check.py new file mode 100644 index 000000000000..2f3dc5026723 --- /dev/null +++ b/systemvm/debian/root/health_checks/iptables_check.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from subprocess import * +from utility import getHealthChecksData, formatPort + + +def main(): + portForwards = getHealthChecksData("portForwarding") + if portForwards is None or len(portForwards) == 0: + print "No portforwarding rules provided to check, skipping" + exit(0) + + failedCheck = False + failureMessage = "Missing port forwarding rules in Iptables-\n " + for portForward in portForwards: + entriesExpected = [] + destIp = portForward["destIp"] + srcIpText = "-d " + portForward["sourceIp"] + srcPortText = "--dport " + formatPort(portForward["sourcePortStart"], portForward["sourcePortEnd"], ":") + dstText = destIp + ":" + formatPort(portForward["destPortStart"], portForward["destPortEnd"], "-") + for algo in [["PREROUTING", "--to-destination"], + ["OUTPUT", "--to-destination"], + ["POSTROUTING", "--to-source"]]: + entriesExpected.append([algo[0], srcIpText, srcPortText, algo[1] + " " + dstText]) + + fetchIpTableEntriesCmd = "iptables-save | grep " + destIp + pout = Popen(fetchIpTableEntriesCmd, shell=True, stdout=PIPE) + if pout.wait() != 0: + failedCheck = True + failureMessage = failureMessage + "Unable to execute iptables-save command " \ + "for fetching rules by " + fetchIpTableEntriesCmd + "\n" + continue + + ipTablesMatchingEntries = pout.communicate()[0].strip().split('\n') + for pfEntryListExpected in entriesExpected: + foundPfEntryList = False + for ipTableEntry in ipTablesMatchingEntries: + # Check if all expected parts of pfEntryList + # is present in this ipTableEntry + foundAll = True + for expectedEntry in pfEntryListExpected: + if ipTableEntry.find(expectedEntry) == -1: + foundAll = False + break + + if foundAll: + foundPfEntryList = True + break + + if not foundPfEntryList: + failedCheck = True + failureMessage = failureMessage + str(pfEntryListExpected) + "\n" + + if failedCheck: + print failureMessage + exit(1) + else: + print "Found all entries (count " + str(len(portForwards)) + ") in iptables" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/memory_usage_check.py b/systemvm/debian/root/health_checks/memory_usage_check.py new file mode 100644 index 000000000000..97ca0c5030e8 --- /dev/null +++ b/systemvm/debian/root/health_checks/memory_usage_check.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from subprocess import * +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "maxMemoryUsage" not in data: + print "Missing maxMemoryUsage in health_checks_data " + \ + "systemThresholds, skipping" + exit(0) + + maxMemoryUsage = float(data["maxMemoryUsage"]) + cmd = "free | awk 'FNR == 2 { print $3 * 100 / $2 }'" + pout = Popen(cmd, shell=True, stdout=PIPE) + + if pout.wait() == 0: + currentUsage = float(pout.communicate()[0].strip()) + if currentUsage > maxMemoryUsage: + print "Memory Usage " + str(currentUsage) + \ + "% has crossed threshold of " + str(maxMemoryUsage) + "%" + exit(1) + print "Memory Usage within limits with current at " + \ + str(currentUsage) + "%" + exit(0) + else: + print "Failed to retrieve memory usage using " + cmd + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/router_version_check.py b/systemvm/debian/root/health_checks/router_version_check.py new file mode 100644 index 000000000000..2173e09c81f0 --- /dev/null +++ b/systemvm/debian/root/health_checks/router_version_check.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from utility import getHealthChecksData + + +def getFirstLine(file=None): + if file is not None and path.isfile(file): + ret = None + with open(file, 'r') as oFile: + lines = oFile.readlines() + if len(lines) > 0: + ret = lines[0].strip() + oFile.close() + + return ret + else: + return None + + +def main(): + entries = getHealthChecksData("routerVersion") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if len(data) == 0: + print "Missing routerVersion in health_checks_data, skipping" + exit(0) + + templateVersionMatches = True + scriptVersionMatches = True + + if "templateVersion" in data: + expected = data["templateVersion"].strip() + releaseFile = "/etc/cloudstack-release" + found = getFirstLine(releaseFile) + if found is None: + print "Release version not yet setup at " + releaseFile +\ + ", skipping." + elif expected != found: + print "Template Version mismatch. Expected: " + \ + expected + ", found: " + found + templateVersionMatches = False + + if "scriptsVersion" in data: + expected = data["scriptsVersion"].strip() + sigFile = "/var/cache/cloud/cloud-scripts-signature" + found = getFirstLine(sigFile) + if found is None: + print "Scripts signature is not yet setup at " + sigFile +\ + ", skipping" + if expected != found: + print "Scripts Version mismatch. Expected: " + \ + expected + ", found: " + found + scriptVersionMatches = False + + if templateVersionMatches and scriptVersionMatches: + print "Template and scripts version match successful" + exit(0) + else: + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/utility/__init__.py b/systemvm/debian/root/health_checks/utility/__init__.py new file mode 100644 index 000000000000..22ac3ff90137 --- /dev/null +++ b/systemvm/debian/root/health_checks/utility/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from sharedFunctions import getHealthChecksData, formatPort diff --git a/systemvm/debian/root/health_checks/utility/sharedFunctions.py b/systemvm/debian/root/health_checks/utility/sharedFunctions.py new file mode 100644 index 000000000000..20ef640b64a2 --- /dev/null +++ b/systemvm/debian/root/health_checks/utility/sharedFunctions.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + + +def getHealthChecksData(additionalDataKey=None): + with open('/root/health_checks_data.json', 'r') as hc_data_file: + hc_data = json.load(hc_data_file) + + # If no specific key is requested return all the data as JSON + if additionalDataKey is None: + return hc_data + + if additionalDataKey not in hc_data["health_checks_config"]: + return None + + data = hc_data["health_checks_config"][additionalDataKey].strip().split(";") + addData = [] + for line in data: + line = line.strip() + if len(line) == 0: + continue + entries = line.split(',') + d = {} + for entry in entries: + entry = entry.strip() + if len(entry) == 0: + continue + keyVal = entry.split("=") + if len(keyVal) == 2: + d[keyVal[0].strip()] = keyVal[1].strip() + if len(d) > 0: + addData.append(d) + + return addData + + +def formatPort(portStart, portEnd, delim="-"): + return portStart if portStart == portEnd else portStart + delim + portEnd diff --git a/systemvm/debian/root/monitorServices.py b/systemvm/debian/root/monitorServices.py index 75d10043816d..909e419c1801 100755 --- a/systemvm/debian/root/monitorServices.py +++ b/systemvm/debian/root/monitorServices.py @@ -16,16 +16,15 @@ # specific language governing permissions and limitations # under the License. - - - - from ConfigParser import SafeConfigParser from subprocess import * -from os import path +from datetime import datetime import time import os import logging +import json +from os import sys, path +from health_checks.utility import getHealthChecksData class StatusCodes: SUCCESS = 0 @@ -42,15 +41,15 @@ class Log: NOTIF = 'NOTIF' class Config: - MONIT_AFTER_MINS = 30 SLEEP_SEC = 1 RETRY_ITERATIONS = 10 RETRY_FOR_RESTART = 5 MONITOR_LOG = '/var/log/monitor.log' - UNMONIT_PS_FILE = '/etc/unmonit_psList.txt' + HEALTH_CHECKS_DIR = 'health_checks' + MONITOR_RESULT_FILE_SUFFIX = 'monitor_results.json' + FAILING_CHECKS_FILE = 'failing_health_checks' - -def getConfig( config_file_path = "/etc/monitor.conf" ): +def getServicesConfig( config_file_path = "/etc/monitor.conf" ): """ Reads the process configuration from the config file. Config file contains the processes to be monitored. @@ -66,7 +65,7 @@ def getConfig( config_file_path = "/etc/monitor.conf" ): for name, value in parser.items(section): process_dict[section][name] = value -# printd (" %s = %r" % (name, value)) + printd (" %s = %r" % (name, value)) return process_dict @@ -77,12 +76,12 @@ def printd (msg): #for debug #print msg - return 0 - f= open(Config.MONITOR_LOG,'r+') + f= open(Config.MONITOR_LOG, 'w' if not path.isfile(Config.MONITOR_LOG) else 'r+') f.seek(0, 2) f.write(str(msg)+"\n") f.close() + print str(msg) def raisealert(severity, msg, process_name=None): """ Writes the alert message""" @@ -97,6 +96,7 @@ def raisealert(severity, msg, process_name=None): logging.info(log) msg = 'logger -t monit '+ log pout = Popen(msg, shell=True, stdout=PIPE) + print "[Alert] " + msg def isPidMatchPidFile(pidfile, pids): @@ -126,7 +126,7 @@ def isPidMatchPidFile(pidfile, pids): fd.close() return StatusCodes.FAILED - printd("file content "+str(inp)) + printd("file content of pidfile " + pidfile + " = " + str(inp).strip()) printd(pids) tocheck_pid = inp.strip() for item in pids: @@ -152,7 +152,7 @@ def checkProcessRunningStatus(process_name, pidFile): #check there is only one pid or not if exitStatus == 0: - pids = temp_out.split(' ') + pids = temp_out.strip().split(' ') printd("pid(s) of process %s are %s " %(process_name, pids)) #there is more than one process so match the pid file @@ -181,11 +181,10 @@ def restartService(service_name): return False - - def checkProcessStatus( process ): """ Check the process running status, if not running tries to restart + Returns the process status and if it was restarted """ process_name = process.get('processname') service_name = process.get('servicename') @@ -197,13 +196,13 @@ def checkProcessStatus( process ): cmd='' if process_name is None: printd ("\n Invalid Process Name") - return StatusCodes.INVALID_INP + return StatusCodes.INVALID_INP, False status, pids = checkProcessRunningStatus(process_name, pidfile) if status == True: printd("The process is running ....") - return StatusCodes.RUNNING + return StatusCodes.RUNNING, False else: printd("Process %s is not running trying to recover" %process_name) #Retry the process state for few seconds @@ -243,138 +242,151 @@ def checkProcessStatus( process ): raisealert(Log.ALERT,process_name,msg) printd("Restart failed after number of retries") - return StatusCodes.STOPPED + return StatusCodes.STOPPED, False - return StatusCodes.RUNNING + return StatusCodes.RUNNING, True def monitProcess( processes_info ): """ Monitors the processes which got from the config file """ + checkStartTime = time.time() + service_status = {} + failing_services = [] if len( processes_info ) == 0: - printd("Invalid Input") - return StatusCodes.INVALID_INP + printd("No config items provided - means a redundant VR or a VPC Router") + return service_status, failing_services - dict_unmonit={} - umonit_update={} - unMonitPs=False - - if not path.isfile(Config.UNMONIT_PS_FILE): - printd('Unmonit File not exist') - else: - #load the dictionary with unmonit process list - dict_unmonit = loadPsFromUnMonitFile() + print "[Process Info] " + json.dumps(processes_info) #time for noting process down time csec = repr(time.time()).split('.')[0] for process,properties in processes_info.items(): - #skip the process it its time stamp less than Config.MONIT_AFTER_MINS - printd ("checking the service %s \n" %process) - - if not is_emtpy(dict_unmonit): - if dict_unmonit.has_key(process): - ts = dict_unmonit[process] - - if checkPsTimeStampForMonitor (csec, ts, properties) == False: - unMonitPs = True - continue - - if checkProcessStatus( properties) != StatusCodes.RUNNING: + printd ("---------------------------\nchecking the service %s\n---------------------------- " %process) + serviceName = process + ".service" + processStatus, wasRestarted = checkProcessStatus(properties) + if processStatus != StatusCodes.RUNNING: printd( "\n Service %s is not Running"%process) - #add this process into unmonit list - printd ("updating the service for unmonit %s\n" %process) - umonit_update[process]=csec - - #if dict is not empty write to file else delete it - if not is_emtpy(umonit_update): - writePsListToUnmonitFile(umonit_update) - else: - if is_emtpy(umonit_update) and unMonitPs == False: - #delete file it is there - removeFile(Config.UNMONIT_PS_FILE) - - -def checkPsTimeStampForMonitor(csec,ts, process): - printd("Time difference=%s" %str(int(csec) - int(ts))) - tmin = (int(csec) - int(ts) )/60 - - if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS: - raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS)) - printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin))) - return False - - return True - -def removeFile(fileName): - if path.isfile(fileName): - printd("Removing the file %s" %fileName) - os.remove(fileName) - -def loadPsFromUnMonitFile(): - - dict_unmonit = {} - - try: - fd = open(Config.UNMONIT_PS_FILE) - except: - printd("Failed to open file %s " %(Config.UNMONIT_PS_FILE)) - return StatusCodes.FAILED - - ps = fd.read() - - if not ps: - printd("File %s content is empty " %Config.UNMONIT_PS_FILE) - return StatusCodes.FAILED - - printd(ps) - plist = ps.split(',') - plist.remove('') - for i in plist: - dict_unmonit[i.split(':')[0]] = i.split(':')[1] - - fd.close() - - return dict_unmonit - - -def writePsListToUnmonitFile(umonit_update): - printd("Write updated unmonit list to file") - line='' - for i in umonit_update: - line+=str(i)+":"+str(umonit_update[i])+',' - printd(line) - try: - fd=open(Config.UNMONIT_PS_FILE,'w') - except: - printd("Failed to open file %s " %Config.UNMONIT_PS_FILE) - return StatusCodes.FAILED - - fd.write(line) - fd.close() + checkEndTime = time.time() + service_status[serviceName] = { + "success": "false", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": "service down at last check " + str(csec) + } + failing_services.append(serviceName) + else: + checkEndTime = time.time() + service_status[serviceName] = { + "success": "true", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": "service is running" + (", was restarted" if wasRestarted else "") + } + + return service_status, failing_services + + +def execute(script, checkType = "basic"): + checkStartTime = time.time() + cmd = "./" + script + " " + checkType + printd ("Executing health check script command: " + cmd) + pout = Popen(cmd, shell=True, stdout=PIPE) + exitStatus = pout.wait() + output = pout.communicate()[0].strip() + checkEndTime = time.time() -def is_emtpy(struct): - """ - Checks wether the given struct is empty or not - """ - if struct: - return False + if exitStatus == 0: + if len(output) > 0: + printd("Successful execution of " + script) + return { + "success": "true", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": output + } + return {} #Skip script if no output is received else: - return True - -def main(): + printd("Script execution failed " + script) + return { + "success": "false", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": output + } + +def main(checkType = "basic"): + startTime = time.time() ''' - Step1 : Get Config + Step1 : Get Services Config ''' printd("monitoring started") - temp_dict = getConfig() + configDict = getServicesConfig() + + ''' + Step2: Monitor services and Raise Alerts + ''' + monitResult = {} + failingChecks = [] + if checkType == "basic": + monitResult, failingChecks = monitProcess(configDict) ''' - Step2: Monitor and Raise Alert + Step3: Run health check scripts as needed ''' - monitProcess( temp_dict ) + hc_data = getHealthChecksData() + + if hc_data is not None and "health_checks_enabled" in hc_data and hc_data['health_checks_enabled']: + hc_exclude = hc_data["excluded_health_checks"] if "excluded_health_checks" in hc_data else [] + for f in os.listdir(Config.HEALTH_CHECKS_DIR): + if f in hc_exclude: + continue + fpath = path.join(Config.HEALTH_CHECKS_DIR, f) + if path.isfile(fpath) and os.access(fpath, os.X_OK): + ret = execute(fpath, checkType) + if len(ret) == 0: + continue + if "success" in ret and ret["success"].lower() == "false": + failingChecks.append(f) + monitResult[f] = ret + + ''' + Step4: Write results to the json file for admins/management server to read + ''' + + endTime = time.time() + monitResult["lastRun"] = { + "start": str(datetime.fromtimestamp(startTime)), + "end": str(datetime.fromtimestamp(endTime)), + "duration": str(endTime - startTime) + } + + with open(checkType + "_" + Config.MONITOR_RESULT_FILE_SUFFIX, 'w') as f: + json.dump(monitResult, f, ensure_ascii=False) + + failChecksFile = checkType + "_" + Config.FAILING_CHECKS_FILE + if len(failingChecks) > 0: + fcs = "" + for fc in failingChecks: + fcs = fcs + fc + "," + fcs = fcs[0:-1] + with open(failChecksFile, 'w') as f: + f.write(fcs) + elif path.isfile(failChecksFile): + os.remove(failChecksFile) if __name__ == "__main__": - main() + checkType = "basic" + if len(sys.argv) == 2: + if sys.argv[1] == "advanced": + main("advanced") + elif sys.argv[1] == "basic": + main("basic") + else: + printd("Error: Unknown type of test: " + sys.argv) + else: + main("basic") + main("advanced") diff --git a/test/integration/component/test_routers.py b/test/integration/component/test_routers.py index 45e2853db898..196d054ed340 100644 --- a/test/integration/component/test_routers.py +++ b/test/integration/component/test_routers.py @@ -21,7 +21,8 @@ from marvin.cloudstackTestCase import cloudstackTestCase from marvin.cloudstackAPI import (stopVirtualMachine, stopRouter, - startRouter) + startRouter, + getRouterHealthCheckResults) from marvin.lib.utils import (cleanup_resources, get_process_status) from marvin.lib.base import (ServiceOffering, @@ -594,6 +595,75 @@ def test_03_RouterStartOnVmDeploy(self): return + @attr(tags=["advanced"], required_hardware="true") + def test_04_RouterHealthChecksResults(self): + """Test advanced zone router list contains health check records + """ + + routers = list_routers( + self.apiclient, + account=self.account.name, + domainid=self.account.domainid, + fetchhealthcheckresults=True + ) + + self.assertEqual(isinstance(routers, list), True, + "Check for list routers response return valid data" + ) + self.assertNotEqual( + len(routers), 0, + "Check list router response" + ) + + router = routers[0] + self.info("Router ID: %s & Router state: %s" % ( + router.id, router.state + )) + + self.assertEqual(isinstance(router.healthcheckresults, list), True, + "Router response should contain it's health check result as list" + ) + + cmd = getRouterHealthCheckResults.getRouterHealthCheckResultsCmd() + cmd.routerid = router.id + cmd.performfreshchecks = True # Perform fresh checks as a newly created router may not have results + healthData = self.api_client.getRouterHealthCheckResults(cmd) + self.info("Router ID: %s & Router state: %s" % ( + router.id, router.state + )) + + self.assertEqual(router.id, healthData.routerid, + "Router response should contain it's health check result so id should match" + ) + self.assertEqual(isinstance(healthData.healthchecks, list), True, + "Router response should contain it's health check result as list" + ) + + self.verifyCheckTypes(healthData.healthchecks) + self.verifyCheckNames(healthData.healthchecks) + + def verifyCheckTypes(self, healthChecks): + for checkType in ["basic", "advanced"]: + foundType = False + for check in healthChecks: + if check.checktype == checkType: + foundType = True + break + self.assertTrue(foundType, + "Router should contain health check results info for type: " + checkType + ) + + def verifyCheckNames(self, healthChecks): + for checkName in ["dns_check.py", "dhcp_check.py", "haproxy_check.py", "disk_space_check.py", "iptables_check.py", "gateways_check.py", "router_version_check.py"]: + foundCheck = False + for check in healthChecks: + if check.checkname == checkName: + foundCheck = True + break + self.assertTrue(foundCheck, + "Router should contain health check results info for check name: " + checkName + ) + class TestRouterStopCreatePF(cloudstackTestCase): diff --git a/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh b/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh index 56406b711f52..e9814dd06cea 100644 --- a/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh +++ b/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh @@ -48,6 +48,7 @@ function install_cloud_scripts() { /root/{clearUsageRules.sh,reconfigLB.sh,monitorServices.py} \ /etc/profile.d/cloud.sh /etc/cron.daily/* /etc/cron.hourly/* + chmod +x /root/health_checks/* chmod -x /etc/systemd/system/* systemctl daemon-reload diff --git a/ui/l10n/en.js b/ui/l10n/en.js index 87deba8142b2..e516725191f4 100644 --- a/ui/l10n/en.js +++ b/ui/l10n/en.js @@ -291,6 +291,8 @@ var dictionary = { "label.action.stop.instance.processing":"Stopping Instance....", "label.action.stop.router":"Stop Router", "label.action.stop.router.processing":"Stopping Router....", +"label.action.router.health.checks":"Get health checks result", +"label.perform.fresh.checks":"Perform fresh checks", "label.action.stop.systemvm":"Stop System VM", "label.action.stop.systemvm.processing":"Stopping System VM....", "label.action.take.snapshot":"Take Snapshot", @@ -579,6 +581,7 @@ var dictionary = { "label.continue":"Continue", "label.continue.basic.install":"Continue with basic installation", "label.copying.iso":"Copying ISO", +"label.copy.text": "Copy Text", "label.corrections.saved":"Corrections saved", "label.counter":"Counter", "label.cpu":"CPU", @@ -1505,6 +1508,12 @@ var dictionary = { "label.root.disk.offering":"Root Disk Offering", "label.root.disk.size":"Root disk size (GB)", "label.router.vm.scaled.up":"Router VM Scaled Up", +"label.router.health.checks":"Health Checks", +"label.router.health.check.name":"Check name", +"label.router.health.check.type":"Type", +"label.router.health.check.success":"Success", +"label.router.health.check.last.updated":"Last updated", +"label.router.health.check.details":"Details", "label.routing":"Routing", "label.routing.host":"Routing Host", "label.rule":"Rule", @@ -1972,6 +1981,7 @@ var dictionary = { "message.action.start.systemvm":"Please confirm that you want to start this system VM.", "message.action.stop.instance":"Please confirm that you want to stop this instance.", "message.action.stop.router":"All services provided by this virtual router will be interrupted. Please confirm that you want to stop this router.", +"message.action.router.health.checks":"Health checks result will be fetched from router.", "message.action.stop.systemvm":"Please confirm that you want to stop this system VM.", "message.action.take.snapshot":"Please confirm that you want to take a snapshot of this volume.", "message.action.unmanage.cluster":"Please confirm that you want to unmanage the cluster.", diff --git a/ui/scripts/cloudStack.js b/ui/scripts/cloudStack.js index 9b5f0117058b..97eec9ce6531 100644 --- a/ui/scripts/cloudStack.js +++ b/ui/scripts/cloudStack.js @@ -102,6 +102,64 @@ var $container = $('#cloudStack3-container'); + var updateSharedConfigs = function() { + // Update global pagesize for list APIs in UI + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'default.ui.page.size'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'default.ui.page.size') { + pageSize = parseInt(config.value); + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + + // Update global pagesize for sort key in UI + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'sortkey.algorithm'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'sortkey.algorithm') { + g_sortKeyIsAscending = config.value == 'true'; + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + + // Update global router health checks enabled + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'router.health.checks.enabled'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'router.health.checks.enabled') { + g_routerHealthChecksEnabled = config.value == 'true'; + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + } var loginArgs = { $container: $container, @@ -170,61 +228,25 @@ } }); - // Update global pagesize for list APIs in UI - $.ajax({ - type: 'GET', - url: createURL('listConfigurations'), - data: {name: 'default.ui.page.size'}, - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { - var config = data.listconfigurationsresponse.configuration[0]; - if (config && config.name == 'default.ui.page.size') { - pageSize = parseInt(config.value); - } - } - }, - error: function(xhr) { // ignore any errors, fallback to the default - } - }); - - // Update global pagesize for sort key in UI - $.ajax({ - type: 'GET', - url: createURL('listConfigurations'), - data: {name: 'sortkey.algorithm'}, - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { - var config = data.listconfigurationsresponse.configuration[0]; - if (config && config.name == 'sortkey.algorithm') { - g_sortKeyIsAscending = config.value == 'true'; - } - } - }, - error: function(xhr) { // ignore any errors, fallback to the default - } - }); - - // Populate IDP list - $.ajax({ - type: 'GET', - url: createURL('listIdps'), - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listidpsresponse && data.listidpsresponse.idp) { - var idpList = data.listidpsresponse.idp.sort(function (a, b) { - return a.orgName.localeCompare(b.orgName); - }); - g_idpList = idpList; - } - }, - error: function(xhr) { - } - }); + updateSharedConfigs() + + // Populate IDP list + $.ajax({ + type: 'GET', + url: createURL('listIdps'), + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listidpsresponse && data.listidpsresponse.idp) { + var idpList = data.listidpsresponse.idp.sort(function (a, b) { + return a.orgName.localeCompare(b.orgName); + }); + g_idpList = idpList; + } + }, + error: function(xhr) { + } + }); return userValid ? { user: { @@ -337,6 +359,7 @@ }) } }); + updateSharedConfigs(); }, error: function(xmlHTTP) { args.response.error(); @@ -377,6 +400,7 @@ g_regionsecondaryenabled = null; g_loginCmdText = null; g_allowUserViewAllDomainAccounts = null; + g_routerHealthChecksEnabled = false; // Remove any cookies var cookies = document.cookie.split(";"); diff --git a/ui/scripts/sharedFunctions.js b/ui/scripts/sharedFunctions.js index d3e6fe870bee..f8ad7bfff84b 100644 --- a/ui/scripts/sharedFunctions.js +++ b/ui/scripts/sharedFunctions.js @@ -37,6 +37,7 @@ var g_idpList = null; var g_appendIdpDomain = false; var g_sortKeyIsAscending = false; var g_allowUserViewAllDomainAccounts = false; +var g_routerHealthChecksEnabled = false; //keyboard keycode var keycode_Enter = 13; diff --git a/ui/scripts/system.js b/ui/scripts/system.js index 2ae2f466043c..b1c75a75779a 100755 --- a/ui/scripts/system.js +++ b/ui/scripts/system.js @@ -9886,6 +9886,7 @@ listView: { id: 'routers', label: 'label.virtual.appliances', + horizontalOverflow: true, fields: { name: { label: 'label.name' @@ -9914,7 +9915,19 @@ indicator: { 'Running': 'on', 'Stopped': 'off', - 'Error': 'off' + 'Error': 'off', + 'Alert': 'warning' + } + }, + healthchecksfailed: { + converter: function (str) { + if (str) return 'Failed' + return 'Passed'; + }, + label: 'label.health.check', + indicator: { + false: 'on', + true: 'warning' } }, requiresupgrade: { @@ -9922,6 +9935,12 @@ converter: cloudStack.converters.toBooleanText } }, + preFilter: function () { + if (!g_routerHealthChecksEnabled) { + return ['healthchecksfailed'] + } + return [] + }, dataProvider: function (args) { var array1 =[]; if (args.filterBy != null) { @@ -9982,44 +10001,47 @@ routers.push(item); }); - /* - * In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1), - * because in project view, all API calls are appended with projectid=[projectID]. - * Therefore, we only call the second listRouters API(with projectid=-1) in non-project view. - */ - if (cloudStack.context && cloudStack.context.projects == null) { //non-project view - /* - * account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API. - * So, remove account parameter(account+domainid) from data2 - */ - if ("account" in data2) { - delete data2.account; - } - if ("domainid" in data2) { - delete data2.domainid; - } + /* + * In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1), + * because in project view, all API calls are appended with projectid=[projectID]. + * Therefore, we only call the second listRouters API(with projectid=-1) in non-project view. + */ + if (cloudStack.context && cloudStack.context.projects == null) { //non-project view + /* + * account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API. + * So, remove account parameter(account+domainid) from data2 + */ + if ("account" in data2) { + delete data2.account; + } + if ("domainid" in data2) { + delete data2.domainid; + } - $.ajax({ - url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"), - data: data2, - async: false, - success: function (json) { - var items = json.listroutersresponse.router ? - json.listroutersresponse.router:[]; + $.ajax({ + url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"), + data: data2, + async: false, + success: function (json) { + var items = json.listroutersresponse.router ? + json.listroutersresponse.router:[]; - $(items).map(function (index, item) { - routers.push(item); - }); + var items = json.listroutersresponse.router ? + json.listroutersresponse.router:[]; + + $(items).map(function (index, item) { + routers.push(item); + }); + } + }); } - }); - } - args.response.success({ - actionFilter: routerActionfilter, - data: $(routers).map(mapRouterType) - }); - } + args.response.success({ + actionFilter: routerActionfilter, + data: $(routers).map(mapRouterType) }); + } + }); }, detailView: { name: 'label.virtual.appliance.details', @@ -10542,6 +10564,56 @@ height: 640 } } + }, + + healthChecks: { + label: 'label.action.router.health.checks', + createForm: { + title: 'label.action.router.health.checks', + desc: 'message.action.router.health.checks', + fields: { + performfreshchecks: { + label: 'label.perform.fresh.checks', + isBoolean: true + } + } + }, + action: function (args) { + if (!g_routerHealthChecksEnabled) { + cloudStack.dialog.notice({ + message: 'Router health checks are disabled. Please enable router.health.checks.enabled to execute this action' + }) + args.response.success() + return + } + var data = { + 'routerid': args.context.routers[0].id, + 'performfreshchecks': (args.data.performfreshchecks === 'on') + }; + $.ajax({ + url: createURL('getRouterHealthCheckResults'), + dataType: 'json', + data: data, + async: true, + success: function (json) { + var healthChecks = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks + var numChecks = healthChecks.length + var failedChecks = 0 + $.each(healthChecks, function(idx, check) { + if (!check.success) failedChecks = failedChecks + 1 + }) + cloudStack.dialog.notice({ + message: 'Found ' + numChecks + ' checks for router, with ' + failedChecks + ' failing checks. Please visit router > Health Checks tab to see details' + }) + args.response.success(); + } + }); + }, + messages: { + notification: function(args) { + return 'label.action.router.health.checks' + } + } } }, tabs: { @@ -10731,6 +10803,78 @@ } }); } + }, + healthCheckResults: { + title: 'label.router.health.checks', + listView: { + id: 'routerHealthCheckResults', + label: 'label.router.health.checks', + hideToolbar: true, + fields: { + checkname: { + label: 'label.router.health.check.name' + }, + checktype: { + label: 'label.router.health.check.type' + }, + success: { + label: 'label.router.health.check.success', + converter: function (args) { + if (args) { + return _l('True'); + } else { + return _l('False'); + } + }, + indicator: { + true: 'on', + false: 'off' + } + }, + lastupdated: { + label: 'label.router.health.check.last.updated' + } + }, + actions: { + details: { + label: 'label.router.health.check.details', + action: { + custom: function (args) { + cloudStack.dialog.notice({ + message: args.context.routerHealthCheckResults[0].details + }) + } + } + } + }, + dataProvider: function(args) { + if (!g_routerHealthChecksEnabled) { + cloudStack.dialog.notice({ + message: 'Router health checks are disabled. Please enable router.health.checks.enabled to get data' + }) + args.response.success({}) + return + } + if (args.page > 1) { + // Only one page is supported as it's not list command. + args.response.success({}); + return + } + + $.ajax({ + url: createURL('getRouterHealthCheckResults'), + data: { + 'routerid': args.context.routers[0].id + }, + success: function (json) { + var hcData = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks + args.response.success({ + data: hcData + }); + } + }); + } + } } } } @@ -22273,6 +22417,7 @@ allowedActions.push("migrate"); allowedActions.push("diagnostics"); allowedActions.push("retrieveDiagnostics"); + allowedActions.push("healthChecks"); } } else if (jsonObj.state == 'Stopped') { allowedActions.push("start"); diff --git a/ui/scripts/ui/widgets/listView.js b/ui/scripts/ui/widgets/listView.js index 91578148e281..5245c10a3faf 100644 --- a/ui/scripts/ui/widgets/listView.js +++ b/ui/scripts/ui/widgets/listView.js @@ -168,8 +168,11 @@ } else { if (needsRefresh) { var $loading = $('
').addClass('loading-overlay'); - - $listView.prepend($loading); + if ($listView) { + $listView.prepend($loading); + } else { + $instanceRow.closest('.list-view').prepend($loading) + } } var actionArgs = {