Skip to content

Commit 136505b

Browse files
ustcweizhouyadvr
authored andcommitted
server: double check host capacity when start/migrate a vm (#3728)
When start a vm or migrate a vm (away from a host in host maintenance), cloudstack will check capacity of all hosts and choose one. If there are hundreds of hosts on the platform, it will take some seconds. When cloudstack choose a host and start/migrate vm to it, the resource consumption of the host might have been changed. This normally happens when we start/migrate multiple vms. It would be better to double check the host capacity when start vm on a host. This PR includes the fix for cpucore capacity when start/migrate a vm.
1 parent 71e53ab commit 136505b

2 files changed

Lines changed: 65 additions & 3 deletions

File tree

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,6 +2353,17 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
23532353
_networkMgr.rollbackNicForMigration(vmSrc, profile);
23542354
s_logger.info("Migration cancelled because " + e1.getMessage());
23552355
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
2356+
} catch (final CloudRuntimeException e2) {
2357+
_networkMgr.rollbackNicForMigration(vmSrc, profile);
2358+
s_logger.info("Migration cancelled because " + e2.getMessage());
2359+
work.setStep(Step.Done);
2360+
_workDao.update(work.getId(), work);
2361+
try {
2362+
stateTransitTo(vm, Event.OperationFailed, srcHostId);
2363+
} catch (final NoTransitionException e3) {
2364+
s_logger.warn(e3.getMessage());
2365+
}
2366+
throw new CloudRuntimeException("Migration cancelled because " + e2.getMessage());
23562367
}
23572368

23582369
boolean migrated = false;

server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ public boolean releaseVmCapacity(VirtualMachine vm, final boolean moveFromReserv
172172
final ServiceOfferingVO svo = _offeringsDao.findById(vm.getId(), vm.getServiceOfferingId());
173173
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
174174
CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
175+
CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE);
175176
Long clusterId = null;
176177
if (hostId != null) {
177178
HostVO host = _hostDao.findById(hostId);
@@ -182,28 +183,34 @@ public boolean releaseVmCapacity(VirtualMachine vm, final boolean moveFromReserv
182183

183184
clusterId = host.getClusterId();
184185
}
185-
if (capacityCpu == null || capacityMemory == null || svo == null) {
186+
if (capacityCpu == null || capacityMemory == null || svo == null || capacityCpuCore == null) {
186187
return false;
187188
}
188189

189190
try {
190191
final Long clusterIdFinal = clusterId;
191192
final long capacityCpuId = capacityCpu.getId();
192193
final long capacityMemoryId = capacityMemory.getId();
194+
final long capacityCpuCoreId = capacityCpuCore.getId();
195+
193196
Transaction.execute(new TransactionCallbackNoReturn() {
194197
@Override
195198
public void doInTransactionWithoutResult(TransactionStatus status) {
196199
CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true);
197200
CapacityVO capacityMemory = _capacityDao.lockRow(capacityMemoryId, true);
201+
CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true);
198202

199203
long usedCpu = capacityCpu.getUsedCapacity();
200204
long usedMem = capacityMemory.getUsedCapacity();
205+
long usedCpuCore = capacityCpuCore.getUsedCapacity();
201206
long reservedCpu = capacityCpu.getReservedCapacity();
202207
long reservedMem = capacityMemory.getReservedCapacity();
208+
long reservedCpuCore = capacityCpuCore.getReservedCapacity();
203209
long actualTotalCpu = capacityCpu.getTotalCapacity();
204210
float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "cpuOvercommitRatio").getValue());
205211
float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "memoryOvercommitRatio").getValue());
206212
int vmCPU = svo.getCpu() * svo.getSpeed();
213+
int vmCPUCore = svo.getCpu();
207214
long vmMem = svo.getRamSize() * 1024L * 1024L;
208215
long actualTotalMem = capacityMemory.getTotalCapacity();
209216
long totalMem = (long)(actualTotalMem * memoryOvercommitRatio);
@@ -221,6 +228,9 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
221228
if (usedMem >= vmMem) {
222229
capacityMemory.setUsedCapacity(usedMem - vmMem);
223230
}
231+
if (usedCpuCore >= vmCPUCore) {
232+
capacityCpuCore.setUsedCapacity(usedCpuCore - vmCPUCore);
233+
}
224234

225235
if (moveToReservered) {
226236
if (reservedCpu + vmCPU <= totalCpu) {
@@ -229,6 +239,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
229239
if (reservedMem + vmMem <= totalMem) {
230240
capacityMemory.setReservedCapacity(reservedMem + vmMem);
231241
}
242+
capacityCpuCore.setReservedCapacity(reservedCpuCore + vmCPUCore);
232243
}
233244
} else {
234245
if (reservedCpu >= vmCPU) {
@@ -237,6 +248,9 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
237248
if (reservedMem >= vmMem) {
238249
capacityMemory.setReservedCapacity(reservedMem - vmMem);
239250
}
251+
if (reservedCpuCore >= vmCPUCore) {
252+
capacityCpuCore.setReservedCapacity(reservedCpuCore - vmCPUCore);
253+
}
240254
}
241255

242256
s_logger.debug("release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu +
@@ -249,6 +263,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
249263

250264
_capacityDao.update(capacityCpu.getId(), capacityCpu);
251265
_capacityDao.update(capacityMemory.getId(), capacityMemory);
266+
_capacityDao.update(capacityCpuCore.getId(), capacityCpuCore);
252267
}
253268
});
254269

@@ -263,8 +278,9 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
263278
@Override
264279
public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
265280

281+
final long vmId = vm.getId();
266282
final long hostId = vm.getHostId();
267-
HostVO host = _hostDao.findById(hostId);
283+
final HostVO host = _hostDao.findById(hostId);
268284
final long clusterId = host.getClusterId();
269285
final float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue());
270286
final float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue());
@@ -273,28 +289,35 @@ public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
273289

274290
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
275291
CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
292+
CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE);
276293

277-
if (capacityCpu == null || capacityMem == null || svo == null) {
294+
if (capacityCpu == null || capacityMem == null || svo == null || capacityCpuCore == null) {
278295
return;
279296
}
280297

281298
final int cpu = svo.getCpu() * svo.getSpeed();
299+
final int cpucore = svo.getCpu();
300+
final int cpuspeed = svo.getSpeed();
282301
final long ram = svo.getRamSize() * 1024L * 1024L;
283302

284303
try {
285304
final long capacityCpuId = capacityCpu.getId();
286305
final long capacityMemId = capacityMem.getId();
306+
final long capacityCpuCoreId = capacityCpuCore.getId();
287307

288308
Transaction.execute(new TransactionCallbackNoReturn() {
289309
@Override
290310
public void doInTransactionWithoutResult(TransactionStatus status) {
291311
CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true);
292312
CapacityVO capacityMem = _capacityDao.lockRow(capacityMemId, true);
313+
CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true);
293314

294315
long usedCpu = capacityCpu.getUsedCapacity();
295316
long usedMem = capacityMem.getUsedCapacity();
317+
long usedCpuCore = capacityCpuCore.getUsedCapacity();
296318
long reservedCpu = capacityCpu.getReservedCapacity();
297319
long reservedMem = capacityMem.getReservedCapacity();
320+
long reservedCpuCore = capacityCpuCore.getReservedCapacity();
298321
long actualTotalCpu = capacityCpu.getTotalCapacity();
299322
long actualTotalMem = capacityMem.getTotalCapacity();
300323
long totalCpu = (long)(actualTotalCpu * cpuOvercommitRatio);
@@ -313,6 +336,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
313336
}
314337
capacityCpu.setUsedCapacity(usedCpu + cpu);
315338
capacityMem.setUsedCapacity(usedMem + ram);
339+
capacityCpuCore.setUsedCapacity(usedCpuCore + cpucore);
316340

317341
if (fromLastHost) {
318342
/* alloc from reserved */
@@ -324,6 +348,7 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
324348
if (reservedCpu >= cpu && reservedMem >= ram) {
325349
capacityCpu.setReservedCapacity(reservedCpu - cpu);
326350
capacityMem.setReservedCapacity(reservedMem - ram);
351+
capacityCpuCore.setReservedCapacity(reservedCpuCore - cpucore);
327352
}
328353
} else {
329354
/* alloc from free resource */
@@ -343,12 +368,38 @@ public void doInTransactionWithoutResult(TransactionStatus status) {
343368
totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram +
344369
",alloc_from_last:" + fromLastHost);
345370

371+
long cluster_id = host.getClusterId();
372+
ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio");
373+
ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio");
374+
Float cpuOvercommitRatio = Float.parseFloat(cluster_detail_cpu.getValue());
375+
Float memoryOvercommitRatio = Float.parseFloat(cluster_detail_ram.getValue());
376+
377+
boolean hostHasCpuCapability, hostHasCapacity = false;
378+
hostHasCpuCapability = checkIfHostHasCpuCapability(host.getId(), cpucore, cpuspeed);
379+
380+
if (hostHasCpuCapability) {
381+
// first check from reserved capacity
382+
hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, true, cpuOvercommitRatio, memoryOvercommitRatio, true);
383+
384+
// if not reserved, check the free capacity
385+
if (!hostHasCapacity)
386+
hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, false, cpuOvercommitRatio, memoryOvercommitRatio, true);
387+
}
388+
389+
if (!hostHasCapacity || !hostHasCpuCapability) {
390+
throw new CloudRuntimeException("Host does not have enough capacity for vm " + vmId);
391+
}
392+
346393
_capacityDao.update(capacityCpu.getId(), capacityCpu);
347394
_capacityDao.update(capacityMem.getId(), capacityMem);
395+
_capacityDao.update(capacityCpuCore.getId(), capacityCpuCore);
348396
}
349397
});
350398
} catch (Exception e) {
351399
s_logger.error("Exception allocating VM capacity", e);
400+
if (e instanceof CloudRuntimeException) {
401+
throw e;
402+
}
352403
return;
353404
}
354405
}

0 commit comments

Comments
 (0)