Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ public void beforeTakeLiveSnapshotsOnVolumes(CreateVolumesSnapshotOverlayInnerMs
logger.info(String.format("take snapshots for volumes[%s] on %s",
msg.getLockedVolumeUuids(), getClass().getCanonicalName()));

List<VolumeSnapshotInventory> inventories = Collections.synchronizedList(new ArrayList<>());
ErrorCodeList errList = new ErrorCodeList();
new While<>(cephStructs).all((struct, whileCompletion) -> {
VolumeSnapshotVO vo = Q.New(VolumeSnapshotVO.class).eq(VolumeSnapshotVO_.uuid, struct.getResourceUuid()).find();
Expand Down Expand Up @@ -779,6 +780,7 @@ public void run(MessageReply reply) {
dbf.update(vo);

struct.getVolumeSnapshotStruct().setCurrent(treply.getInventory());
inventories.add(treply.getInventory());
whileCompletion.done();
}
});
Expand All @@ -787,6 +789,17 @@ public void run(MessageReply reply) {
public void done(ErrorCodeList errorCodeList) {
if (!errList.getCauses().isEmpty()) {
completion.fail(errList.getCauses().get(0));

inventories.forEach(snapshot -> {
VolumeSnapshotDeletionMsg msg = new VolumeSnapshotDeletionMsg();
msg.setSnapshotUuid(snapshot.getUuid());
msg.setTreeUuid(snapshot.getTreeUuid());
msg.setVolumeUuid(snapshot.getVolumeUuid());
msg.setScope(DeleteVolumeSnapshotScope.Single.toString());
msg.setDirection(DeleteVolumeSnapshotDirection.Commit.toString());
bus.makeTargetServiceIdByResourceUuid(msg, VolumeSnapshotConstant.SERVICE_ID, snapshot.getUuid());
bus.send(msg);
});
Comment on lines +792 to +802
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

建议为快照清理操作添加错误处理和日志记录

当前的清理逻辑采用"发送后不管"(fire-and-forget)方式,存在以下风险:

  1. 无法验证快照删除是否成功
  2. 如果删除失败,可能导致孤立快照残留
  3. 缺少日志记录,难以追踪清理操作的执行情况

建议改进:

                 if (!errList.getCauses().isEmpty()) {
                     completion.fail(errList.getCauses().get(0));
 
+                    logger.warn(String.format("snapshot creation failed, cleaning up %d successfully created snapshots", inventories.size()));
                     inventories.forEach(snapshot -> {
                         VolumeSnapshotDeletionMsg msg = new VolumeSnapshotDeletionMsg();
                         msg.setSnapshotUuid(snapshot.getUuid());
                         msg.setTreeUuid(snapshot.getTreeUuid());
                         msg.setVolumeUuid(snapshot.getVolumeUuid());
                         msg.setScope(DeleteVolumeSnapshotScope.Single.toString());
                         msg.setDirection(DeleteVolumeSnapshotDirection.Commit.toString());
                         bus.makeTargetServiceIdByResourceUuid(msg, VolumeSnapshotConstant.SERVICE_ID, snapshot.getUuid());
-                        bus.send(msg);
+                        bus.send(msg, new CloudBusCallBack(null) {
+                            @Override
+                            public void run(MessageReply reply) {
+                                if (!reply.isSuccess()) {
+                                    logger.warn(String.format("failed to delete snapshot[uuid:%s] during cleanup: %s", 
+                                        snapshot.getUuid(), reply.getError()));
+                                } else {
+                                    logger.debug(String.format("successfully deleted snapshot[uuid:%s] during cleanup", 
+                                        snapshot.getUuid()));
+                                }
+                            }
+                        });
                     });
                     return;
                 }

这样可以:

  • 记录清理操作的执行情况
  • 捕获并记录删除失败的错误
  • 提高系统的可观测性和可维护性

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In
plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageFactory.java
around lines 792 to 802, the snapshot deletion operation uses a fire-and-forget
approach by calling bus.send(msg) without handling responses or adding logging,
which makes it impossible to verify deletion success or diagnose failures.
Modify this code to add error handling and logging: replace the simple
bus.send(msg) call with bus.send(msg, new AbstractMessageListener() {...}) to
add a response handler that logs successful deletions and captures any errors
that occur, ensuring you also add a log statement before sending to record that
the deletion operation was initiated.

return;
}
completion.success();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,7 @@ public void beforeTakeLiveSnapshotsOnVolumes(CreateVolumesSnapshotOverlayInnerMs
logger.info(String.format("take snapshots for volumes[%s] on %s",
msg.getLockedVolumeUuids(), getClass().getCanonicalName()));

List<VolumeSnapshotInventory> inventories = Collections.synchronizedList(new ArrayList<>());
ErrorCodeList errList = new ErrorCodeList();
new While<>(storageSnapshots).all((struct, whileCompletion) -> {
VolumeSnapshotVO vo = Q.New(VolumeSnapshotVO.class).eq(VolumeSnapshotVO_.uuid, struct.getResourceUuid()).find();
Expand Down Expand Up @@ -574,6 +575,7 @@ public void run(MessageReply reply) {
dbf.update(vo);

struct.getVolumeSnapshotStruct().setCurrent(treply.getInventory());
inventories.add(treply.getInventory());
whileCompletion.done();
}
});
Expand All @@ -582,6 +584,18 @@ public void run(MessageReply reply) {
public void done(ErrorCodeList errorCodeList) {
if (!errList.getCauses().isEmpty()) {
completion.fail(errList.getCauses().get(0));

inventories.forEach(snapshot -> {
VolumeSnapshotDeletionMsg msg = new VolumeSnapshotDeletionMsg();
msg.setSnapshotUuid(snapshot.getUuid());
msg.setTreeUuid(snapshot.getTreeUuid());
msg.setVolumeUuid(snapshot.getVolumeUuid());
msg.setScope(DeleteVolumeSnapshotScope.Single.toString());
msg.setDirection(DeleteVolumeSnapshotDirection.Commit.toString());
bus.makeTargetServiceIdByResourceUuid(msg, VolumeSnapshotConstant.SERVICE_ID, snapshot.getUuid());
bus.send(msg);
});

return;
}
completion.success();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import javax.persistence.Tuple;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;


Expand Down Expand Up @@ -74,6 +75,8 @@ public APIMessage intercept(APIMessage msg) throws ApiMessageInterceptionExcepti
validate((APIBatchDeleteVolumeSnapshotMsg) msg);
} else if (msg instanceof APIRevertVmFromSnapshotGroupMsg) {
validate((APIRevertVmFromSnapshotGroupMsg) msg);
} else if (msg instanceof APIDeleteVolumeSnapshotGroupMsg) {
validate((APIDeleteVolumeSnapshotGroupMsg) msg);
}

setServiceId(msg);
Expand Down Expand Up @@ -217,4 +220,29 @@ private void validate(APIBatchDeleteVolumeSnapshotMsg msg) {
throw new ApiMessageInterceptionException(operr("can not find volume uuid for snapshosts[uuid: %s]", msg.getUuids()));
}
}

private void validate(APIDeleteVolumeSnapshotGroupMsg msg) {
VolumeSnapshotGroupVO groupVO = dbf.findByUuid(msg.getUuid(), VolumeSnapshotGroupVO.class);
// 获取当前虚拟机所有内存快照
// 检测内存快照是否完整
// 1 完整 允许删除
// 2 不完整 不允许删除
List<VolumeSnapshotGroupVO> groups = Q.New(VolumeSnapshotGroupVO.class)
.eq(VolumeSnapshotGroupVO_.vmInstanceUuid, groupVO.getVmInstanceUuid())
.orderByAsc(VolumeSnapshotGroupVO_.createDate)
.list();

final String[] ungroupUuid = new String[1];
groups.forEach(group -> {
Set<VolumeSnapshotGroupRefVO> volumeSnapshotRefs = group.getVolumeSnapshotRefs();
volumeSnapshotRefs.forEach(ref -> {
if (ref.isSnapshotDeleted()) {
ungroupUuid[0] = group.getUuid();
}
});
});
if (ungroupUuid[0] != null) {
throw new ApiMessageInterceptionException(argerr("volume snapshot group[uuid:%s] is not complete, cannot delete volume snapshot group", ungroupUuid[0]));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2088,21 +2088,37 @@ private void ungroupAfterDeleted(List<VolumeSnapshotInventory> snapshots) {
List<String> uuids = snapshots.stream().map(VolumeSnapshotInventory::getUuid).collect(Collectors.toList());
SQL.New(VolumeSnapshotGroupRefVO.class).in(VolumeSnapshotGroupRefVO_.volumeSnapshotUuid, uuids)
.set(VolumeSnapshotGroupRefVO_.snapshotDeleted, true).update();
if (currentRoot.getVolumeType().equals(VolumeType.Root.toString())) {
List<String> groupUuids = new ArrayList<>();
for (VolumeSnapshotInventory snapshot : snapshots) {
String groupUuid = snapshot.getGroupUuid();
if (groupUuid != null) {
logger.debug(String.format("root volume snapshot[uuid:%s, name:%s] has been deleted, " +
"ungroup snapshot group[uuid:%s]", snapshot.getUuid(), snapshot.getName(), groupUuid));
groupUuids.add(groupUuid);
}

}
List<String> groupUuids = snapshots.stream().map(VolumeSnapshotInventory::getGroupUuid).filter(Objects::nonNull).collect(Collectors.toList());
if (groupUuids.isEmpty()) {
return;
}

List<VolumeSnapshotGroupVO> groupVOs = Q.New(VolumeSnapshotGroupVO.class).in(VolumeSnapshotGroupVO_.uuid, groupUuids).list();
groupVOs.forEach(groupVO -> {
new RunInQueue(String.format("ungroup-volumeSnapshotGroup-%s", groupVO.getUuid()), thdf, 1)
.name("ungroup-volumeSnapshotGroup-in-queue").asyncBackup(null)
.run(chain -> ungroupAfterDeleted(groupVO, new NoErrorCompletion(chain) {
@Override
public void done() {
chain.next();
}
})
);
});
}

groupUuids.forEach(groupUuid -> vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupUuid));
dbf.removeByPrimaryKeys(groupUuids, VolumeSnapshotGroupVO.class);
private void ungroupAfterDeleted(VolumeSnapshotGroupVO groupVO, NoErrorCompletion completion) {
if (!groupVO.getVolumeSnapshotRefs().stream().allMatch(VolumeSnapshotGroupRefVO::isSnapshotDeleted)) {
logger.debug(String.format("skipping ungroup operation for snapshot group[uuid:%s]: " +
"no group meet deletion criteria (due to remaining volume snapshots).", groupVO.getUuid()));
completion.done();
return;
}
logger.debug(String.format("snapshot group[uuid:%s] all volume snapshot has been deleted, delete snapshot group", groupVO.getUuid()));
vidm.deleteArchiveVmInstanceResourceMetadataGroup(groupVO.getUuid());
dbf.removeByPrimaryKey(groupVO.getUuid(), VolumeSnapshotGroupVO.class);
completion.done();
}

private List<VolumeSnapshotBackupStorageDeletionMsg> makeVolumeSnapshotBackupStorageDeletionMsg(List<String> bsUuids) {
Expand Down