Skip to content

Commit 3ee8d83

Browse files
committed
CLOUDSTACK-10136: Fix RemoteHostEndPoint thread growth
This fixes the following: - Unchecked thread growth in RemoteEndHostEndPoint - Potential NPE while finding EP for a storage/scope Unbounded thread growth can be reproduced with following findings: - Every unreachable template would produce 6 new threads (in a single ScheduledExecutorService instance) spaced by 10 seconds - Every reachable template url without the template would produce 1 new thread (and one ScheduledExecutorService instance), it errors out quickly without causing more thread growth. - Every valid url will produce upto 10 threads as the same ep (endpoint instance) will be reused to query upload/download (async callback) progresses. Every RemoteHostEndPoint instances creates its own ScheduledExecutorService instance which is why in the jstack dump, we see several threads that share the prefix RemoteHostEndPoint-{1..10} (given poolsize is defined as 10, it uses suffixes 1-10). This fixes the discovered thread leakage with following notes: - Instead of ScheduledExecutorService instance, a cached pool could be used instead and was implemented, and with `static` scope to be reused among other future RemoteHostEndPoint instances. - It was not clear why we would want to wait when we've Answers returned from the remote EP, and therefore a scheduled/delayed Runnable was not required at all for processing answers. ScheduledExecutorService was therefore not really required, moved to ExecutorService instead. - Another benefit of using a cached pool is that it will shutdown threads if they are not used in 60 seconds, and they get re-used for future runnable submissions. - Caveat: the executor service is still unbounded, however, the use-case that this method is used for short jobs to check upload/download progresses fits the case here. - Refactored CmdRunner to not use/reference objects from parent class. Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
1 parent 9c067c0 commit 3ee8d83

2 files changed

Lines changed: 24 additions & 21 deletions

File tree

engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,15 @@
1818
*/
1919
package org.apache.cloudstack.storage;
2020

21+
import java.util.concurrent.ExecutorService;
2122
import java.util.concurrent.Executors;
22-
import java.util.concurrent.ScheduledExecutorService;
23-
import java.util.concurrent.TimeUnit;
2423

2524
import javax.inject.Inject;
2625

27-
import org.apache.log4j.Logger;
28-
2926
import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint;
3027
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
3128
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
29+
import org.apache.log4j.Logger;
3230

3331
import com.cloud.agent.AgentManager;
3432
import com.cloud.agent.Listener;
@@ -54,9 +52,11 @@
5452

5553
public class RemoteHostEndPoint implements EndPoint {
5654
private static final Logger s_logger = Logger.getLogger(RemoteHostEndPoint.class);
55+
5756
private long hostId;
5857
private String hostAddress;
5958
private String publicAddress;
59+
6060
@Inject
6161
AgentManager agentMgr;
6262
@Inject
@@ -65,10 +65,10 @@ public class RemoteHostEndPoint implements EndPoint {
6565
protected SecondaryStorageVmDao vmDao;
6666
@Inject
6767
protected HostDao _hostDao;
68-
private ScheduledExecutorService executor;
68+
69+
private static ExecutorService executorService = Executors.newCachedThreadPool(new NamedThreadFactory("RemoteHostEndPoint"));
6970

7071
public RemoteHostEndPoint() {
71-
executor = Executors.newScheduledThreadPool(10, new NamedThreadFactory("RemoteHostEndPoint"));
7272
}
7373

7474
private void configure(Host host) {
@@ -134,17 +134,17 @@ public Answer sendMessage(Command cmd) {
134134
}
135135

136136
private class CmdRunner extends ManagedContextRunnable implements Listener {
137-
final AsyncCompletionCallback<Answer> callback;
138-
Answer answer;
137+
private final AsyncCompletionCallback<Answer> callback;
138+
private Answer answer;
139139

140-
public CmdRunner(AsyncCompletionCallback<Answer> callback) {
140+
CmdRunner(final AsyncCompletionCallback<Answer> callback) {
141141
this.callback = callback;
142142
}
143143

144144
@Override
145145
public boolean processAnswers(long agentId, long seq, Answer[] answers) {
146-
answer = answers[0];
147-
executor.schedule(this, 10, TimeUnit.SECONDS);
146+
this.answer = answers[0];
147+
RemoteHostEndPoint.executorService.submit(this);
148148
return true;
149149
}
150150

@@ -204,7 +204,7 @@ public boolean processTimeout(long agentId, long seq) {
204204

205205
@Override
206206
protected void runInContext() {
207-
callback.complete(answer);
207+
this.callback.complete(this.answer);
208208
}
209209
}
210210

engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,19 @@ protected EndPoint findEndPointInScope(Scope scope, String sqlBase, Long poolId)
106106
StringBuilder sbuilder = new StringBuilder();
107107
sbuilder.append(sqlBase);
108108

109-
if (scope.getScopeType() == ScopeType.HOST) {
110-
sbuilder.append(" and h.id = ");
111-
sbuilder.append(scope.getScopeId());
112-
} else if (scope.getScopeType() == ScopeType.CLUSTER) {
113-
sbuilder.append(" and h.cluster_id = ");
114-
sbuilder.append(scope.getScopeId());
115-
} else if (scope.getScopeType() == ScopeType.ZONE) {
116-
sbuilder.append(" and h.data_center_id = ");
117-
sbuilder.append(scope.getScopeId());
109+
if (scope != null) {
110+
if (scope.getScopeType() == ScopeType.HOST) {
111+
sbuilder.append(" and h.id = ");
112+
sbuilder.append(scope.getScopeId());
113+
} else if (scope.getScopeType() == ScopeType.CLUSTER) {
114+
sbuilder.append(" and h.cluster_id = ");
115+
sbuilder.append(scope.getScopeId());
116+
} else if (scope.getScopeType() == ScopeType.ZONE) {
117+
sbuilder.append(" and h.data_center_id = ");
118+
sbuilder.append(scope.getScopeId());
119+
}
118120
}
121+
119122
// TODO: order by rand() is slow if there are lot of hosts
120123
sbuilder.append(") t where t.value<>'true' or t.value is null"); //Added for exclude cluster's subquery
121124
sbuilder.append(" ORDER by rand() limit 1");

0 commit comments

Comments
 (0)