diff --git a/hw-misc-psp-support-live-migrate-for-vpsp-device.patch b/1169-hw-misc-psp-support-live-migrate-for-vpsp-device.patch similarity index 100% rename from hw-misc-psp-support-live-migrate-for-vpsp-device.patch rename to 1169-hw-misc-psp-support-live-migrate-for-vpsp-device.patch diff --git a/1170-hw-vfio-hct-support-start-with-ccp.ko-driver.patch b/1170-hw-vfio-hct-support-start-with-ccp.ko-driver.patch new file mode 100644 index 0000000000000000000000000000000000000000..b65af26478c98de15659baeeaf156299def205e8 --- /dev/null +++ b/1170-hw-vfio-hct-support-start-with-ccp.ko-driver.patch @@ -0,0 +1,372 @@ +From e543cce822049c80176816d86779d298efba03b5 Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Thu, 24 Oct 2024 20:30:46 +0800 +Subject: [PATCH 1/5] hw/vfio/hct: support start with ccp.ko driver + +Change-Id: I32df2111871129b4042db89216c746e116c69e03 +Signed-off-by: Yabin Li +--- + hw/vfio/hct.c | 190 +++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 156 insertions(+), 34 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index f4531b05e..300832901 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include "qemu/osdep.h" + #include "qemu/queue.h" +@@ -31,19 +32,25 @@ + + #define MAX_CCP_CNT 48 + #define DEF_CCP_CNT_MAX 16 ++#define MAX_HW_QUEUES 5 + #define PAGE_SIZE 4096 + #define HCT_SHARED_MEMORY_SIZE (PAGE_SIZE * MAX_CCP_CNT) + #define CCP_INDEX_BYTES 4 + #define PATH_MAX 4096 + #define TYPE_HCT_DEV "hct" + #define PCI_HCT_DEV(obj) OBJECT_CHECK(HCTDevState, (obj), TYPE_HCT_DEV) +-#define HCT_MMIO_SIZE (1 << 20) + #define HCT_MAX_PASID (1 << 8) + + #define PCI_VENDOR_ID_HYGON_CCP 0x1d94 + #define PCI_DEVICE_ID_HYGON_CCP 0x1468 + ++#define VFIO_DEVICE_CCP_SET_MODE _IO(VFIO_TYPE, VFIO_BASE + 32) ++#define VFIO_DEVICE_CCP_GET_MODE _IO(VFIO_TYPE, VFIO_BASE + 33) ++ + #define HCT_SHARE_DEV "/dev/hct_share" ++#define CCP_SHARE_DEV "/dev/ccp_share" ++#define PCI_DRV_HCT_DIR "/sys/bus/pci/drivers/hct" ++#define PCI_DRV_CCP_DIR "/sys/bus/pci/drivers/ccp" + + #define DEF_VERSION_STRING "0.1" + #define HCT_VERSION_STR_02 "0.2" +@@ -80,6 +87,7 @@ static volatile struct hct_data { + uint8_t hct_version[VERSION_SIZE]; + uint8_t ccp_index[MAX_CCP_CNT]; + uint8_t ccp_cnt; ++ uint8_t driver; + } hct_data; + + typedef struct SharedDevice { +@@ -93,7 +101,9 @@ typedef struct HctDevState { + MemoryRegion mmio; + MemoryRegion shared; + MemoryRegion pasid; ++ uint64_t map_size[PCI_NUM_REGIONS]; + void *maps[PCI_NUM_REGIONS]; ++ char *ccp_dev_path; + } HCTDevState; + + struct hct_dev_ctrl { +@@ -111,12 +121,23 @@ struct hct_dev_ctrl { + }; + }; + ++enum ccp_dev_used_mode { ++ _KERNEL_SPACE_USED = 0, ++ _USER_SPACE_USED, ++}; ++ + enum MDEV_USED_TYPE { + MDEV_USED_FOR_HOST, + MDEV_USED_FOR_VM, + MDEV_USED_UNDEF + }; + ++enum hct_ccp_driver_mode_type { ++ HCT_CCP_DRV_MOD_UNINIT = 0, ++ HCT_CCP_DRV_MOD_HCT, ++ HCT_CCP_DRV_MOD_CCP, ++}; ++ + static int hct_get_sysfs_value(const char *path, int *val) + { + FILE *fp = NULL; +@@ -193,25 +214,30 @@ static const MemoryRegionOps hct_mmio_ops = { + + static void vfio_hct_put_device(HCTDevState *state) + { +- g_free(state->vdev.name); + vfio_put_base_device(&state->vdev); +- return; + } + + static void vfio_hct_exit(PCIDevice *dev) + { + HCTDevState *state = PCI_HCT_DEV(dev); + +- vfio_hct_put_device(state); +- vfio_put_group(state->vdev.group); ++ if (hct_data.driver == HCT_CCP_DRV_MOD_HCT) { ++ vfio_hct_put_device(state); ++ vfio_put_group(state->vdev.group); ++ } + if (hct_data.hct_fd) { + qemu_close(hct_data.hct_fd); + hct_data.hct_fd = 0; + } ++ if (state->vdev.fd) { ++ qemu_close(state->vdev.fd); ++ state->vdev.fd = 0; ++ } + } + + static Property vfio_hct_properties[] = { + DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), ++ DEFINE_PROP_STRING("path", HCTDevState, ccp_dev_path), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -227,13 +253,16 @@ struct VFIODeviceOps vfio_ccp_ops = { + static void vfio_hct_get_device(VFIOGroup *group, HCTDevState *state, + Error **errp) + { +- if (vfio_get_device(group, state->vdev.name, &state->vdev, errp)) +- return; ++ char *mdevid = NULL; + +- state->vdev.ops = &vfio_ccp_ops; +- state->vdev.dev = &state->sdev.dev.qdev; ++ mdevid = g_path_get_basename(state->vdev.sysfsdev); ++ state->vdev.name = g_strdup_printf("%s", mdevid); + +- return; ++ if (vfio_get_device(group, state->vdev.name, &state->vdev, errp) == 0) { ++ state->vdev.ops = &vfio_ccp_ops; ++ state->vdev.dev = &state->sdev.dev.qdev; ++ } ++ g_free(state->vdev.name); + } + + static VFIOGroup *vfio_hct_get_group(HCTDevState *state, Error **errp) +@@ -282,14 +311,15 @@ static int vfio_hct_region_mmap(HCTDevState *state) + error_report("vfio mmap fail\n"); + goto out; + } ++ state->map_size[i] = info->size; + } + g_free(info); + } + + memory_region_init_io(&state->mmio, OBJECT(state), &hct_mmio_ops, +- state, "hct mmio", HCT_MMIO_SIZE); ++ state, "hct mmio", state->map_size[HCT_REG_BAR_IDX]); + memory_region_init_ram_device_ptr(&state->mmio, OBJECT(state), +- "hct mmio", HCT_MMIO_SIZE, ++ "hct mmio", state->map_size[HCT_REG_BAR_IDX], + state->maps[HCT_REG_BAR_IDX]); + + memory_region_init_io(&state->shared, OBJECT(state), &hct_mmio_ops, +@@ -330,11 +360,67 @@ static int hct_check_duplicated_index(int index) + return 0; + } + ++static int hct_ccp_dev_get_index(HCTDevState *state) ++{ ++ char fpath[PATH_MAX] = {0}; ++ char *ptr = NULL; ++ uint32_t loops= 0; ++ uint32_t max_loops = 10000; ++ int ccp_idx; ++ int fd; ++ int ret; ++ ++ if (!state->ccp_dev_path) { ++ error_report("state->ccp_dev_path is NULL."); ++ return -1; ++ } ++ ++ ptr = strstr(state->ccp_dev_path, "ccp"); ++ if (!ptr) ++ return -1; ++ ++ ccp_idx = atoi(ptr + strlen("ccp")); ++ if (hct_check_duplicated_index(ccp_idx)) ++ return -1; ++ ++ fd = qemu_open_old(state->ccp_dev_path, O_RDWR); ++ if (fd < 0) { ++ error_report("fail to open %s, errno %d.", fpath, errno); ++ return -1; ++ } ++ ++ while ((ret = ioctl(fd, VFIO_DEVICE_CCP_SET_MODE, _USER_SPACE_USED)) < 0 ++ && errno == EAGAIN) { ++ if (++loops > max_loops) { ++ error_report("loops = %u, configure user mode fail.\n", loops); ++ break; ++ } ++ usleep(10); ++ } ++ if (ret < 0) { ++ error_report("configure user mode for %s fail, errno %d", fpath, errno); ++ close(fd); ++ return -1; ++ } ++ ++ state->vdev.fd = fd; ++ state->sdev.shared_memory_offset = ccp_idx; ++ return 0; ++} ++ + static int hct_get_ccp_index(HCTDevState *state) + { + char path[PATH_MAX] = {0}; + int mdev_used, index; + ++ if (hct_data.driver == HCT_CCP_DRV_MOD_CCP) ++ return hct_ccp_dev_get_index(state); ++ ++ if (!state->vdev.sysfsdev) { ++ error_report("state->vdev.sysfsdev is NULL."); ++ return -1; ++ } ++ + if (memcmp((void *)hct_data.hct_version, HCT_VERSION_STR_06, + sizeof(HCT_VERSION_STR_06)) >= 0) { + snprintf(path, PATH_MAX, "%s/vendor/use", state->vdev.sysfsdev); +@@ -477,6 +563,31 @@ static MemoryListener hct_memory_listener = { + .region_del = hct_listener_region_del, + }; + ++static int hct_get_used_driver_walk(const char *path) ++{ ++ const char filter[] = "0000:*"; ++ struct dirent *e = NULL; ++ DIR *dir = NULL; ++ int ret = -EINVAL; ++ ++ dir = opendir(path); ++ if (dir == NULL) ++ return -1; ++ ++ while ((e = readdir(dir)) != NULL) { ++ if (e->d_name[0] == '.') ++ continue; ++ ++ if (fnmatch(filter, e->d_name, 0) == 0) { ++ ret = 0; ++ break; ++ } ++ } ++ ++ closedir(dir); ++ return ret; ++} ++ + static void hct_data_uninit(HCTDevState *state) + { + if (hct_data.hct_fd) { +@@ -484,6 +595,11 @@ static void hct_data_uninit(HCTDevState *state) + hct_data.hct_fd = 0; + } + ++ if (state->vdev.fd) { ++ qemu_close(state->vdev.fd); ++ state->vdev.fd = 0; ++ } ++ + if (hct_data.pasid) { + hct_data.pasid = 0; + } +@@ -503,16 +619,25 @@ static void hct_data_uninit(HCTDevState *state) + + static int hct_data_init(HCTDevState *state) + { ++ const char *hct_shr_name = NULL; + int ret; + + if (hct_data.init == 0) { ++ ret = hct_get_used_driver_walk(PCI_DRV_HCT_DIR); ++ if (ret == 0) { ++ hct_data.driver = HCT_CCP_DRV_MOD_HCT; ++ hct_shr_name = HCT_SHARE_DEV; ++ } else { ++ hct_data.driver = HCT_CCP_DRV_MOD_CCP; ++ hct_shr_name = CCP_SHARE_DEV; ++ } + +- hct_data.hct_fd = qemu_open_old(HCT_SHARE_DEV, O_RDWR); ++ hct_data.hct_fd = qemu_open_old(hct_shr_name, O_RDWR); + if (hct_data.hct_fd < 0) { +- error_report("fail to open %s, errno %d.", HCT_SHARE_DEV, errno); ++ error_report("fail to open %s, errno %d.", hct_shr_name, errno); + ret = -errno; + goto out; +- } ++ } + + /* The hct.ko version number needs not to be less than 0.2. */ + ret = hct_api_version_check(); +@@ -556,33 +681,28 @@ out: + static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + { + int ret; +- char *mdevid; + Error *err = NULL; +- VFIOGroup *group; ++ VFIOGroup *group = NULL; + HCTDevState *state = PCI_HCT_DEV(pci_dev); + +- /* parsing mdev device name from startup scripts */ +- mdevid = g_path_get_basename(state->vdev.sysfsdev); +- state->vdev.name = g_strdup_printf("%s", mdevid); +- + ret = hct_data_init(state); + if (ret < 0) { + error_setg(errp, "hct data initialization failed."); +- g_free(state->vdev.name); + goto out; + } + +- group = vfio_hct_get_group(state, &err); +- if (!group) { +- error_setg(errp, "hct get vfio iommu_group failed."); +- g_free(state->vdev.name); +- goto data_uninit_out; +- } ++ if (hct_data.driver == HCT_CCP_DRV_MOD_HCT) { ++ group = vfio_hct_get_group(state, &err); ++ if (!group) { ++ error_setg(errp, "hct get vfio iommu_group failed."); ++ goto data_uninit_out; ++ } + +- vfio_hct_get_device(group, state, &err); +- if (err) { +- error_setg(errp, "hct get vfio device information failed."); +- goto put_group_out; ++ vfio_hct_get_device(group, state, &err); ++ if (err) { ++ error_setg(errp, "hct get vfio device information failed."); ++ goto put_group_out; ++ } + } + + ret = vfio_hct_region_mmap(state); +@@ -595,10 +715,12 @@ static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + return; + + put_device_out: +- vfio_hct_put_device(state); ++ if (group) ++ vfio_hct_put_device(state); + + put_group_out: +- vfio_put_group(group); ++ if (group) ++ vfio_put_group(group); + + data_uninit_out: + hct_data_uninit(state); +-- +2.43.5 + diff --git a/1171-hw-vfio-hct-support-vfio-pci-multiple-processes.patch b/1171-hw-vfio-hct-support-vfio-pci-multiple-processes.patch new file mode 100644 index 0000000000000000000000000000000000000000..0338755b707f9fe9aa286ecbb58b824a5f9a1816 --- /dev/null +++ b/1171-hw-vfio-hct-support-vfio-pci-multiple-processes.patch @@ -0,0 +1,1788 @@ +From bfad33c566f51e9aaa9c3dbfcbb452c0d85e5bb5 Mon Sep 17 00:00:00 2001 +From: Xiangyu Xu +Date: Wed, 30 Jul 2025 14:41:10 +0800 +Subject: [PATCH 2/5] hw/vfio/hct: support vfio-pci multiple processes. + +Change-Id: Idea22bcd7583b374803fa67cd2e0ab70a42a3131 +--- + hw/vfio/hct.c | 1566 +++++++++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 1465 insertions(+), 101 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 300832901..75250587b 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -12,8 +12,12 @@ + #include + #include + #include ++#include ++#include + #include + #include ++#include ++#include + + #include "qemu/osdep.h" + #include "qemu/queue.h" +@@ -30,6 +34,181 @@ + #include "qapi/error.h" + #include "hw/qdev-properties.h" + ++// ======================== g_id API ==================== ++ ++#define HCT_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) ++#define HCT_BITMAP_SIZE(nr) HCT_DIV_ROUND_UP(nr, CHAR_BIT * sizeof(unsigned long)) ++ ++static unsigned long g_id = 0; ++ ++enum { ++ BITS_PER_WORD = sizeof(unsigned long) * CHAR_BIT ++}; ++#define WORD_OFFSET(b) ((b) / BITS_PER_WORD) ++#define BIT_OFFSET(b) ((b) % BITS_PER_WORD) ++ ++#ifndef MAX_PATH ++#define MAX_PATH 4096 ++#endif ++ ++/* ++ * Each HCT and QEMU process allocates a unique GID through the shared memory hct_gid_bitmap. ++ * The HCT process uses bits 0-1023 of the bitmap, while the QEMU process uses bits 1024-2047. ++ * After a QEMU process allocates a bit_pos from the bitmap, it first locks the range of bytes ++ * from bit_pos * 8 to (bit_pos + 1) * 8 in the shared memory hct_gid_locks. Then it calculates ++ * the GID: ++ * bit_pos is incremented by 1 (since GID cannot be 0), then subtracted by 1024 (to correct ++ * for the starting offset of the bitmap), resulting in a number in the range of 1-1024. ++ * This number is then left-shifted by HCT_QEMU_GIDS_SHIFT_BITS (18) to obtain the final GID. ++ */ ++#define HCT_GID_BITMAP_SHM_NAME "hct_gid_bitmap" ++#define HCT_GID_LOCK_FILE "hct_gid_locks" ++ ++#define HCT_QEMU_GIDS_BITMAP_MAX_BIT 2048 ++#define HCT_QEMU_GIDS_BITMAP_MIN_BIT 1024 ++#define HCT_QEMU_GIDS_SHIFT_BITS 18 ++#define HCT_GIDS_PER_BLOCK 8 ++ ++static void hct_clear_bit(unsigned long *bitmap, int n); ++static uint32_t hct_get_bit(unsigned long *bitmap, int n); ++ ++/** ++ * File-based bitmap structure for multi-process shared g_id allocation ++ */ ++struct hct_gid_bitmap { ++ char name[MAX_PATH]; ++ unsigned int len; ++ unsigned long *bitmap; ++ int shm_fd; ++ int lock_fd; ++}; ++ ++/// Global bitmap instance for g_id management ++struct hct_gid_bitmap *g_hct_gid_bitmap; ++ ++/** ++ * @brief Allocate a new hct_gid_bitmap structure with shared memory storage ++ * @details Creates shared memory if not exists, or opens existing one ++ * @return pointer to allocated hct_gid_bitmap on success, NULL on failure ++ */ ++static struct hct_gid_bitmap* hct_gid_bitmap_alloc(void); ++ ++/** ++ * @brief Free hct_gid_bitmap structure and cleanup resources ++ * @param bitmap pointer to hct_gid_bitmap to free ++ */ ++static void hct_gid_bitmap_free(struct hct_gid_bitmap *bitmap); ++ ++/** ++ * @brief Allocate a g_id from the 1024-bit bitmap, left-shift by 8 bits ++ * @param bitmap pointer to hct_gid_bitmap structure ++ * @param gid pointer to store allocated g_id ++ * @return 0 on success, -EINVAL on failure ++ */ ++static int hct_g_ids_alloc(struct hct_gid_bitmap *bitmap, unsigned long *gid); ++ ++/** ++ * @brief Free a g_id and clear corresponding bit in bitmap ++ * @param bitmap pointer to hct_gid_bitmap structure ++ * @param gid g_id to free ++ */ ++static void hct_g_ids_free(struct hct_gid_bitmap *bitmap, unsigned long gid); ++ ++/** ++ * @brief Check if g_id is locked by trying to acquire exclusive lock on its file region ++ * @param bitmap pointer to hct_gid_bitmap structure ++ * @param gid g_id to check lock status ++ * @return 0 if can lock (process dead), -EINVAL if cannot lock (process alive) ++ */ ++static int hct_g_ids_lock_state_lock(struct hct_gid_bitmap *bitmap, unsigned long gid); ++ ++/** ++ * @brief Walk through bitmap and check for abnormally exited processes ++ * @details Clean up orphaned g_ids by checking file locks ++ * @param bitmap pointer to hct_gid_bitmap structure ++ */ ++static void hct_g_ids_lock_state_walk(struct hct_gid_bitmap *bitmap); ++ ++// ======================== g_id API end ==================== ++ ++ ++// ======================== HCT IPC API ==================== ++ ++// HCT IPC TLV field type definitions ++#define HCT_IPC_FIELD_COMMAND 1 /* command */ ++#define HCT_IPC_FIELD_CONTAINER_FD 2 /* container fd */ ++#define HCT_IPC_FIELD_GROUP_FD 3 /* group fd */ ++#define HCT_IPC_FIELD_DEVICE_FD 4 /* device fd */ ++#define HCT_IPC_FIELD_GROUP_INFO 5 /* group info */ ++#define HCT_IPC_FIELD_DEVICE_INFO 6 /* device info */ ++#define HCT_IPC_FIELD_DEVICE_NAMES 7 /* device names */ ++#define HCT_IPC_FIELD_VCCP_PATH 9 /* vccp device file path */ ++#define HCT_IPC_FIELD_VCCP_CONTENT 10 /* vccp device file content */ ++#define HCT_IPC_FIELD_ERROR_REASON 11 /* request failed reason */ ++ ++// Error code definitions ++#define HCT_SUCCESS 0 /* success */ ++#define HCT_ERROR_CONNECT (-1) /* connect error */ ++#define HCT_ERROR_RECEIVE (-2) /* receive error */ ++#define HCT_ERROR_INVALID_DATA (-3) /* invalid data */ ++ ++// Constants ++#define HCT_DAEMON_PID_FILE "/var/run/hctd.pid" /* daemon pid file */ ++#define HCT_DAEMON_SOCK_PATH "/var/run/hctd.sock" /* daemon socket path */ ++ ++#define PCI_ADDR_MAX 20 /* pci address max length */ ++#define DEVICE_NAME_MAX 32 /* device name max length */ ++ ++// daemon client command type ++enum hct_daemon_req_cmd { ++ HCT_CMD_GET_ALL_DEVICES = 0x01, /* libhct request: get all devices information */ ++ HCT_CMD_GET_DEVICE_BY_NAME = 0x02, /* qemu request: get device info via vccp file */ ++}; ++ ++typedef struct hct_vccp_req { ++ const char *path; ++ const char *content; ++} hct_vccp_req; ++ ++// TLV structure ++typedef struct { ++ uint16_t type; ++ uint16_t length; ++ void *value; ++} hct_tlv_t; ++ ++// CCP device management structure ++typedef struct { ++ char pci_addr[PCI_ADDR_MAX]; /* PCI address (e.g., 0000:01:00.0) */ ++ int group_id; /* VFIO group ID */ ++ int group_fd; /* VFIO group FD */ ++ int device_fd; /* VFIO device FD */ ++ int group_index; /* Index in group array of the group this device belongs to */ ++} hct_ccp_device_t; ++ ++// Group information structure ++typedef struct { ++ int group_id; ++ int group_fd; ++ int device_count; /* Number of devices in this group */ ++} hct_group_info_t; ++ ++// Overall client device information container ++typedef struct { ++ int container_fd; /* VFIO container file descriptor */ ++ hct_group_info_t *groups; /* VFIO group information array */ ++ int group_count; /* Number of groups */ ++ hct_ccp_device_t *devices; /* VFIO device information array */ ++ int device_count; /* Number of devices */ ++} hct_client_info_t; ++ ++// Internal constants for client implementation ++#define MAX_TLV_BUFFER_SIZE 2048 /* max tlv buffer size */ ++#define MAX_FD_COUNT 128 /* max fd count */ ++ ++// ======================== HCT IPC API end ==================== ++ ++ + #define MAX_CCP_CNT 48 + #define DEF_CCP_CNT_MAX 16 + #define MAX_HW_QUEUES 5 +@@ -47,6 +226,10 @@ + #define VFIO_DEVICE_CCP_SET_MODE _IO(VFIO_TYPE, VFIO_BASE + 32) + #define VFIO_DEVICE_CCP_GET_MODE _IO(VFIO_TYPE, VFIO_BASE + 33) + ++#define SHM_DIR "/dev/shm/" ++#define HCT_GLOBAL_SHARE_SHM_NAME "hct_global_share" ++#define HCT_GLOBAL_SHARE_SHM_PATH SHM_DIR HCT_GLOBAL_SHARE_SHM_NAME ++ + #define HCT_SHARE_DEV "/dev/hct_share" + #define CCP_SHARE_DEV "/dev/ccp_share" + #define PCI_DRV_HCT_DIR "/sys/bus/pci/drivers/hct" +@@ -80,6 +263,8 @@ + static volatile struct hct_data { + int init; + int hct_fd; ++ int hct_shm_fd; ++ int vfio_container_fd; + unsigned long pasid; + unsigned long hct_shared_size; + uint8_t *pasid_memory; +@@ -104,6 +289,10 @@ typedef struct HctDevState { + uint64_t map_size[PCI_NUM_REGIONS]; + void *maps[PCI_NUM_REGIONS]; + char *ccp_dev_path; ++ int container_fd; /* vfio container fd */ ++ int group_fd; /* vfio group fd */ ++ int group_id; /* vfio group id */ ++ int lock_fd; /* vccp flock fd for this device only */ + } HCTDevState; + + struct hct_dev_ctrl { +@@ -136,8 +325,37 @@ enum hct_ccp_driver_mode_type { + HCT_CCP_DRV_MOD_UNINIT = 0, + HCT_CCP_DRV_MOD_HCT, + HCT_CCP_DRV_MOD_CCP, ++ HCT_CCP_DRV_MOD_VFIO_PCI, + }; + ++ ++/* @brief vfio-pci mapping function */ ++static int vfio_hct_dma_map_vfio_pci(int container_fd, void *vaddr, uint64_t iova, uint64_t size); ++ ++/* @brief vfio-pci unmapping function */ ++static int vfio_hct_dma_unmap_vfio_pci(int container_fd, uint64_t iova, uint64_t size); ++ ++/* @brief vfio-pci init from daemon function */ ++static int vfio_hct_init_from_daemon(HCTDevState *state); ++ ++/* @brief hct data uninit function */ ++static void hct_data_uninit(HCTDevState *state); ++ ++/* @brief hct get error string function */ ++static const char *hct_get_error_string(int error_code); ++ ++/* @brief hct find device by pci addr function */ ++static hct_ccp_device_t* hct_find_device_by_pci_addr(hct_client_info_t *client_info, const char *pci_addr); ++ ++/* @brief hct client cleanup function */ ++static void hct_client_cleanup(hct_client_info_t *client_info); ++ ++/* @brief hct client send cmd function */ ++static int hct_client_send_cmd(const char *socket_path, hct_client_info_t *client_info, enum hct_daemon_req_cmd cmd, void *req_data); ++ ++/* @brief hct create user shared memory function */ ++static int hct_create_user_shared_memory(const char *name, size_t size); ++ + static int hct_get_sysfs_value(const char *path, int *val) + { + FILE *fp = NULL; +@@ -174,32 +392,27 @@ static int hct_get_sysfs_value(const char *path, int *val) + static int pasid_get_and_init(HCTDevState *state) + { + void *base = (void *)hct_data.pasid_memory; +- struct hct_dev_ctrl ctrl; + unsigned long *gid = NULL; + int ret; + +- ctrl.op = HCT_SHARE_OP_GET_PASID; +- ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); +- if (ret < 0) { +- ret = -errno; +- error_report("get pasid fail, errno: %d.", errno); ++ g_hct_gid_bitmap = hct_gid_bitmap_alloc(); ++ if (!g_hct_gid_bitmap) { ++ error_report("Failed to allocate hct_gid_bitmap"); + goto out; + } + +- hct_data.pasid = (unsigned long)ctrl.pasid; +- *(unsigned long *)base = (unsigned long)ctrl.pasid; +- +- ctrl.op = HCT_SHARE_OP_GET_ID; +- ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ gid = (unsigned long *)((unsigned long)base + HCT_PASID_MEM_GID_OFFSET); ++ ret = hct_g_ids_alloc(g_hct_gid_bitmap, &g_id); ++ *gid = g_id; + if (ret < 0) { +- ret = -errno; +- error_report("get gid fail, errno: %d", errno); ++ error_report("Failed to allocate g_id, ret=%d", ret); + goto out; ++ } else { ++ hct_data.pasid = *gid >> HCT_QEMU_GIDS_SHIFT_BITS; ++ *(unsigned long *)base = (unsigned long)hct_data.pasid; ++ return 0; + } + +- gid = (unsigned long *)((unsigned long)base + HCT_PASID_MEM_GID_OFFSET); +- *(unsigned long *)gid = (unsigned long)ctrl.id; +- + out: + return ret; + } +@@ -229,15 +442,34 @@ static void vfio_hct_exit(PCIDevice *dev) + qemu_close(hct_data.hct_fd); + hct_data.hct_fd = 0; + } ++ + if (state->vdev.fd) { + qemu_close(state->vdev.fd); + state->vdev.fd = 0; + } ++ ++ /* Release vccp file lock */ ++ if (state->lock_fd >= 0) { ++ flock(state->lock_fd, LOCK_UN); ++ close(state->lock_fd); ++ state->lock_fd = -1; ++ } ++ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ state->container_fd = -1; ++ state->group_fd = -1; ++ } ++ ++ hct_data.ccp_cnt--; ++ ++ if (hct_data.ccp_cnt == 0) { ++ hct_data_uninit(state); ++ } + } + + static Property vfio_hct_properties[] = { + DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), +- DEFINE_PROP_STRING("path", HCTDevState, ccp_dev_path), ++ DEFINE_PROP_STRING("dev", HCTDevState, ccp_dev_path), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -360,34 +592,19 @@ static int hct_check_duplicated_index(int index) + return 0; + } + +-static int hct_ccp_dev_get_index(HCTDevState *state) ++static int hct_ccp_set_mode(HCTDevState *state) + { + char fpath[PATH_MAX] = {0}; +- char *ptr = NULL; + uint32_t loops= 0; + uint32_t max_loops = 10000; +- int ccp_idx; + int fd; + int ret; + +- if (!state->ccp_dev_path) { +- error_report("state->ccp_dev_path is NULL."); +- return -1; +- } +- +- ptr = strstr(state->ccp_dev_path, "ccp"); +- if (!ptr) +- return -1; +- +- ccp_idx = atoi(ptr + strlen("ccp")); +- if (hct_check_duplicated_index(ccp_idx)) +- return -1; +- +- fd = qemu_open_old(state->ccp_dev_path, O_RDWR); +- if (fd < 0) { +- error_report("fail to open %s, errno %d.", fpath, errno); ++ if (state->vdev.fd <= 0) { ++ error_report("fail to get device fd %d.", state->vdev.fd); + return -1; + } ++ fd = state->vdev.fd; + + while ((ret = ioctl(fd, VFIO_DEVICE_CCP_SET_MODE, _USER_SPACE_USED)) < 0 + && errno == EAGAIN) { +@@ -403,47 +620,76 @@ static int hct_ccp_dev_get_index(HCTDevState *state) + return -1; + } + +- state->vdev.fd = fd; +- state->sdev.shared_memory_offset = ccp_idx; + return 0; + } + + static int hct_get_ccp_index(HCTDevState *state) + { + char path[PATH_MAX] = {0}; +- int mdev_used, index; ++ char vccp_content[256] = {0}; ++ FILE *fp = NULL; ++ int mdev_used = 0, index = 0; + + if (hct_data.driver == HCT_CCP_DRV_MOD_CCP) +- return hct_ccp_dev_get_index(state); ++ if(hct_ccp_set_mode(state)) { ++ return -1; ++ } + +- if (!state->vdev.sysfsdev) { +- error_report("state->vdev.sysfsdev is NULL."); +- return -1; +- } ++ if (hct_data.driver == HCT_CCP_DRV_MOD_HCT) { ++ if (!state->vdev.sysfsdev) { ++ error_report("state->vdev.sysfsdev is NULL."); ++ return -1; ++ } ++ ++ if (memcmp((void *)hct_data.hct_version, HCT_VERSION_STR_06, ++ sizeof(HCT_VERSION_STR_06)) >= 0) { ++ snprintf(path, PATH_MAX, "%s/vendor/use", state->vdev.sysfsdev); ++ if (hct_get_sysfs_value(path, &mdev_used)) { ++ error_report("get %s sysfs value fail.\n", path); ++ return -1; ++ } else if (mdev_used != MDEV_USED_FOR_VM) { ++ error_report("The value of file node(%s) is %d, should be MDEV_USED_FOR_VM(%d), pls check.\n", ++ path, mdev_used, MDEV_USED_FOR_VM); ++ return -1; ++ } ++ } + +- if (memcmp((void *)hct_data.hct_version, HCT_VERSION_STR_06, +- sizeof(HCT_VERSION_STR_06)) >= 0) { +- snprintf(path, PATH_MAX, "%s/vendor/use", state->vdev.sysfsdev); +- if (hct_get_sysfs_value(path, &mdev_used)) { ++ snprintf(path, PATH_MAX, "%s/vendor/id", state->vdev.sysfsdev); ++ if (hct_get_sysfs_value(path, &index)) { + error_report("get %s sysfs value fail.\n", path); + return -1; +- } else if (mdev_used != MDEV_USED_FOR_VM) { +- error_report("The value of file node(%s) is %d, should be MDEV_USED_FOR_VM(%d), pls check.\n", +- path, mdev_used, MDEV_USED_FOR_VM); ++ } ++ ++ if (hct_check_duplicated_index(index)) ++ return -1; ++ ++ state->sdev.shared_memory_offset = index; ++ } else if (hct_data.driver == HCT_CCP_DRV_MOD_CCP || hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ fp = fopen(state->ccp_dev_path, "r"); ++ if (!fp) { ++ error_report("Failed to open vccp file %s to get index: %s", state->ccp_dev_path, strerror(errno)); + return -1; + } +- } ++ if (fgets(vccp_content, sizeof(vccp_content), fp) == NULL) { ++ error_report("Failed to read content from vccp file %s", state->ccp_dev_path); ++ fclose(fp); ++ return -1; ++ } ++ fclose(fp); + +- snprintf(path, PATH_MAX, "%s/vendor/id", state->vdev.sysfsdev); +- if (hct_get_sysfs_value(path, &index)) { +- error_report("get %s sysfs value fail.\n", path); +- return -1; ++ if (sscanf(state->ccp_dev_path, "/dev/hct/vccp%*d_%d", &index) != 1) { ++ error_report("Invalid vccp filename format for vfio-pci: %s", state->ccp_dev_path); ++ return -1; ++ } ++ state->sdev.shared_memory_offset = index; ++ if (hct_check_duplicated_index(index)) { ++ return -1; ++ } ++ } else { ++ error_report("Invalid driver mode %d, vccp path %s.\n", hct_data.driver, state->ccp_dev_path); ++ return -1; + } + +- if (hct_check_duplicated_index(index)) +- return -1; +- +- state->sdev.shared_memory_offset = index; + return 0; + } + +@@ -479,7 +725,7 @@ static int hct_shared_memory_init(void) + + hct_data.hct_shared_memory = mmap(NULL, hct_data.hct_shared_size, + PROT_READ | PROT_WRITE, MAP_SHARED, +- hct_data.hct_fd, 0); ++ hct_data.hct_shm_fd, 0); + if (hct_data.hct_shared_memory == MAP_FAILED) { + ret = -errno; + error_report("map hct shared memory fail\n"); +@@ -517,13 +763,24 @@ static void hct_listener_region_add(MemoryListener *listener, + (iova - section->offset_within_address_space); + llsize = int128_sub(llend, int128_make64(iova)); + +- ctrl.op = HCT_SHARE_OP_DMA_MAP; +- ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); +- ctrl.vaddr = (uint64_t)vaddr; +- ctrl.size = llsize; +- ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); +- if (ret < 0) +- error_report("VIFO_MAP_DMA: %d, iova=%lx", -errno, iova); ++ /* according to host running mode to select different DMA mapping mode */ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ iova = iova | (hct_data.pasid << PASID_OFFSET); ++ ret = vfio_hct_dma_map_vfio_pci(hct_data.vfio_container_fd, vaddr, iova, llsize); ++ if (ret < 0) { ++ error_report("VFIO_PCI_MAP_DMA: %d, iova=%lx", ret, iova); ++ } ++ } else { ++ /* host running hct/ccp mdev mode: use hct/ccp module mapping */ ++ ctrl.op = HCT_SHARE_OP_DMA_MAP; ++ ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); ++ ctrl.vaddr = (uint64_t)vaddr; ++ ctrl.size = llsize; ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) { ++ error_report("HCT_MAP_DMA: %d, iova=%lx", -errno, iova); ++ } ++ } + } + + static void hct_listener_region_del(MemoryListener *listener, +@@ -549,13 +806,24 @@ static void hct_listener_region_del(MemoryListener *listener, + + llsize = int128_sub(llend, int128_make64(iova)); + +- ctrl.op = HCT_SHARE_OP_DMA_UNMAP; +- ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); +- ctrl.size = llsize; +- ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); +- if (ret < 0) +- error_report("VIFO_UNMAP_DMA: %d", -errno); +- ++ /* according to host running mode to select different DMA unmapping mode */ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ /* use vfio-pci directly unmapping */ ++ iova = iova | ((uint64_t)hct_data.pasid << PASID_OFFSET); ++ ret = vfio_hct_dma_unmap_vfio_pci(hct_data.vfio_container_fd, iova, llsize); ++ if (ret < 0) { ++ error_report("VFIO_PCI_UNMAP_DMA: %d", ret); ++ } ++ } else { ++ /* host running hct/ccp mdev mode: use hct/ccp module unmapping */ ++ ctrl.op = HCT_SHARE_OP_DMA_UNMAP; ++ ctrl.iova = iova | (hct_data.pasid << PASID_OFFSET); ++ ctrl.size = llsize; ++ ret = ioctl(hct_data.hct_fd, HCT_SHARE_OP, &ctrl); ++ if (ret < 0) { ++ error_report("HCT_UNMAP_DMA: %d", -errno); ++ } ++ } + } + + static MemoryListener hct_memory_listener = { +@@ -595,16 +863,26 @@ static void hct_data_uninit(HCTDevState *state) + hct_data.hct_fd = 0; + } + +- if (state->vdev.fd) { +- qemu_close(state->vdev.fd); +- state->vdev.fd = 0; ++ if (hct_data.driver == HCT_CCP_DRV_MOD_HCT) { ++ if (state->vdev.fd) { ++ qemu_close(state->vdev.fd); ++ state->vdev.fd = 0; ++ } + } + ++ if (hct_data.hct_shm_fd) { ++ qemu_close(hct_data.hct_shm_fd); ++ hct_data.hct_shm_fd = 0; ++ } + if (hct_data.pasid) { + hct_data.pasid = 0; + } + + if (hct_data.pasid_memory) { ++ if (g_id) { ++ hct_g_ids_free(g_hct_gid_bitmap, g_id); ++ g_id = 0; ++ } + munmap(hct_data.pasid_memory, PAGE_SIZE); + hct_data.pasid_memory = NULL; + } +@@ -615,40 +893,115 @@ static void hct_data_uninit(HCTDevState *state) + } + + memory_listener_unregister(&hct_memory_listener); ++ ++ hct_data.init = 0; ++ hct_data.driver = HCT_CCP_DRV_MOD_UNINIT; + } + + static int hct_data_init(HCTDevState *state) + { + const char *hct_shr_name = NULL; +- int ret; ++ int ret = 0; + + if (hct_data.init == 0) { +- ret = hct_get_used_driver_walk(PCI_DRV_HCT_DIR); +- if (ret == 0) { +- hct_data.driver = HCT_CCP_DRV_MOD_HCT; +- hct_shr_name = HCT_SHARE_DEV; +- } else { +- hct_data.driver = HCT_CCP_DRV_MOD_CCP; +- hct_shr_name = CCP_SHARE_DEV; ++ /* ++ * Check driver type based on parameters. ++ * sysfsdev: mdev mode (hct or ccp) ++ * dev(ccp_dev_path): vfio-pci or ccp mode (via vccp files) ++ */ ++ if (state->ccp_dev_path) { ++ FILE *fp = fopen(state->ccp_dev_path, "r"); ++ if (fp) { ++ int type_char = fgetc(fp); ++ fclose(fp); ++ if (type_char == 'v') { ++ hct_data.driver = HCT_CCP_DRV_MOD_VFIO_PCI; ++ } else if (type_char == 'c') { ++ hct_data.driver = HCT_CCP_DRV_MOD_CCP; ++ } else { ++ error_report("hct: invalid vccp file content in %s", state->ccp_dev_path); ++ return -EINVAL; ++ } ++ } else { ++ error_report("hct: cannot open vccp file %s", state->ccp_dev_path); ++ return -EIO; ++ } ++ } else { ++ /* Default to legacy mdev mode check if no params given */ ++ ret = hct_get_used_driver_walk(PCI_DRV_HCT_DIR); ++ if (ret == 0) { ++ hct_data.driver = HCT_CCP_DRV_MOD_HCT; ++ hct_shr_name = HCT_SHARE_DEV; ++ hct_data.hct_fd = qemu_open_old(hct_shr_name, O_RDWR); ++ if (hct_data.hct_fd < 0) { ++ error_report("fail to open %s, errno %d.", hct_shr_name, errno); ++ goto out; ++ } ++ ++ /* The hct.ko version number needs not to be less than 0.2. */ ++ ret = hct_api_version_check(); ++ if (ret) { ++ goto out; ++ } ++ /* close fd for ioctl in kernel module and open the real share memory file below. */ ++ qemu_close(hct_data.hct_fd); ++ ++ } else { ++ /* This case is now handled by the unified logic below, assuming vccp path */ ++ error_report("hct: sysfsdev is only supported hct driver mode."); ++ } ++ } ++ } ++ if (hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI || hct_data.driver == HCT_CCP_DRV_MOD_CCP) { ++ /* host running vfio-pci/ccp mode: get device information from daemon via vccp file */ ++ ret = vfio_hct_init_from_daemon(state); ++ if (ret < 0) { ++ error_report("Failed to initialize device info %d", ret); ++ return ret; + } +- +- hct_data.hct_fd = qemu_open_old(hct_shr_name, O_RDWR); +- if (hct_data.hct_fd < 0) { +- error_report("fail to open %s, errno %d.", hct_shr_name, errno); +- ret = -errno; +- goto out; ++ } ++ if (hct_data.init == 0) { ++ hct_shr_name = HCT_GLOBAL_SHARE_SHM_PATH; ++ hct_data.hct_shm_fd = qemu_open_old(hct_shr_name, O_RDWR); ++ if (hct_data.hct_shm_fd < 0) { ++ if (errno == 2) { ++ ret = hct_create_user_shared_memory(HCT_GLOBAL_SHARE_SHM_NAME, HCT_SHARED_MEMORY_SIZE); ++ if (!ret) { ++ hct_data.hct_shm_fd = qemu_open_old(hct_shr_name, O_RDWR); ++ if (hct_data.hct_shm_fd < 0) { ++ ret = -errno; ++ } ++ } ++ } else { ++ ret = -errno; ++ } ++ if (ret < 0) { ++ error_report("fail to open %s, errno %d.", hct_shr_name, errno); ++ goto out; ++ } + } +- +- /* The hct.ko version number needs not to be less than 0.2. */ +- ret = hct_api_version_check(); +- if (ret) +- goto out; ++ hct_data.hct_shared_size = HCT_SHARED_MEMORY_SIZE; + + /* assign a page to the virtual BAR3 of each CCP. */ + ret = hct_shared_memory_init(); + if (ret) + goto out; + ++ /* Open fd for ioctl */ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_HCT) { ++ hct_data.hct_fd = qemu_open_old(HCT_SHARE_DEV, O_RDWR); ++ if (hct_data.hct_fd < 0) { ++ error_report("fail to open %s, errno %d.", HCT_SHARE_DEV, errno); ++ goto unmap_shared_memory_exit; ++ } ++ } else if (hct_data.driver == HCT_CCP_DRV_MOD_CCP) { ++ hct_data.hct_fd = qemu_open_old(CCP_SHARE_DEV, O_RDWR); ++ if (hct_data.hct_fd < 0) { ++ error_report("fail to open %s, errno %d.", CCP_SHARE_DEV, errno); ++ goto unmap_shared_memory_exit; ++ } ++ } ++ + hct_data.pasid_memory = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (hct_data.pasid_memory < 0) +@@ -657,7 +1010,7 @@ static int hct_data_init(HCTDevState *state) + /* assign a unique pasid to each virtual machine. */ + ret = pasid_get_and_init(state); + if (ret < 0) +- goto unmap_pasid_memory_exit; ++ goto unmap_pasid_memory_exit; + + /* perform DMA_MAP and DMA_UNMAP operations on all memories of the virtual machine. */ + memory_listener_register(&hct_memory_listener, &address_space_memory); +@@ -677,14 +1030,69 @@ out: + return ret; + } + ++#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) ++ ++/* @brief set bus master to avoid ccp stuck in vfio-pci mode */ ++static int pci_vfio_set_bus_master(int dev_fd) ++{ ++ uint16_t reg = 0; ++ int ret = 0; ++ ++ ret = pread(dev_fd, ®, sizeof(reg), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ if (ret != sizeof(reg)) { ++ error_report("Cannot read command from PCI config space!\n"); ++ return -1; ++ } ++ ++ reg |= PCI_COMMAND_MASTER; ++ ++ ret = pwrite(dev_fd, ®, sizeof(reg), ++ VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + ++ PCI_COMMAND); ++ ++ if (ret != sizeof(reg)) { ++ error_report("Cannot write command to PCI config space!\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++ + /* When device is loaded */ + static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + { +- int ret; +- Error *err = NULL; +- VFIOGroup *group = NULL; + HCTDevState *state = PCI_HCT_DEV(pci_dev); ++ VFIOGroup *group = NULL; ++ Error *err = NULL; ++ int ret = 0; ++ ++ /* Initialize lock_fd to -1 */ ++ state->lock_fd = -1; ++ ++ /* ++ * In vfio-pci/ccp mode, lock this device's vccp file to prevent ++ * multiple QEMU instances from using the same vccp group. ++ */ ++ if (state->ccp_dev_path && !state->vdev.sysfsdev) { ++ state->lock_fd = open(state->ccp_dev_path, O_RDONLY | O_CLOEXEC); ++ if (state->lock_fd < 0) { ++ error_setg(errp, "hct: cannot open vccp file %s: %s", ++ state->ccp_dev_path, strerror(errno)); ++ return; ++ } ++ if (flock(state->lock_fd, LOCK_EX | LOCK_NB) != 0) { ++ error_setg(errp, "hct: cannot lock vccp file %s, another QEMU may be using this vccp group.", ++ state->ccp_dev_path); ++ close(state->lock_fd); ++ state->lock_fd = -1; ++ return; ++ } ++ } + ++ /* use hct_data_init to initialize */ + ret = hct_data_init(state); + if (ret < 0) { + error_setg(errp, "hct data initialization failed."); +@@ -703,6 +1111,10 @@ static void vfio_hct_realize(PCIDevice *pci_dev, Error **errp) + error_setg(errp, "hct get vfio device information failed."); + goto put_group_out; + } ++ ++ /* vfio-pci mode: FD is set in vfio_hct_init_from_daemon */ ++ } else if(hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ pci_vfio_set_bus_master(state->vdev.fd); + } + + ret = vfio_hct_region_mmap(state); +@@ -764,3 +1176,955 @@ static void hct_register_types(void) + } + + type_init(hct_register_types); ++ ++/* @brief vfio-pci mode DMA mapping function */ ++static int vfio_hct_dma_map_vfio_pci(int container_fd, void *vaddr, uint64_t iova, uint64_t size) ++{ ++ struct vfio_iommu_type1_dma_map dma_map = { 0 }; ++ int ret = 0; ++ ++ if (container_fd < 0) { ++ error_report("Invalid container fd for vfio-pci mapping"); ++ return -1; ++ } ++ ++ if (!vaddr || !size) { ++ error_report("Invalid parameters for vfio-pci mapping"); ++ return -1; ++ } ++ ++ dma_map.argsz = sizeof(dma_map); ++ dma_map.vaddr = (uint64_t)vaddr; ++ dma_map.size = size; ++ dma_map.iova = (uint64_t)iova; ++ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ++ ++ ret = ioctl(container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); ++ if (ret) { ++ if (errno == EEXIST) { ++ error_report("Memory segment is already mapped in vfio-pci mode, container_fd=%d, iova=%lx, size=%lx", container_fd, iova, size); ++ ret = 0; ++ } else { ++ error_report("Cannot set up DMA remapping in vfio-pci mode, error %i (%s)", ++ errno, strerror(errno)); ++ } ++ } ++ ++ return ret; ++} ++ ++static int vfio_hct_dma_unmap_vfio_pci(int container_fd, uint64_t iova, uint64_t size) ++{ ++ struct vfio_iommu_type1_dma_unmap dma_unmap = { 0 }; ++ int ret = 0; ++ ++ if (container_fd < 0) { ++ error_report("Invalid container fd for vfio-pci unmapping"); ++ return -1; ++ } ++ ++ if (!iova || !size) { ++ error_report("Invalid parameters for vfio-pci unmapping, iova %lu, size %lu\n", iova, size); ++ return -1; ++ } ++ ++ dma_unmap.argsz = sizeof(dma_unmap); ++ dma_unmap.size = size; ++ dma_unmap.iova = (uint64_t)iova; ++ ++ ret = ioctl(container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); ++ if (ret < 0) { ++ error_report("Cannot unmap DMA in vfio-pci mode, error %i (%s)", ++ errno, strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++/* @brief get vfio file descriptor from daemon */ ++static int vfio_hct_init_from_daemon(HCTDevState *state) ++{ ++ hct_client_info_t client_info; ++ hct_vccp_req req_data; ++ hct_ccp_device_t *device_info = NULL; ++ hct_group_info_t *group_info = NULL; ++ char vccp_content[256] = {0}; ++ char bdf[PCI_ADDR_MAX] = {0}; ++ FILE *fp = NULL; ++ int ret = 0; ++ char type_char = 'c'; ++ ++ fp = fopen(state->ccp_dev_path, "r"); ++ if (!fp) { ++ error_report("Failed to open vccp file %s: %s", state->ccp_dev_path, strerror(errno)); ++ return -EINVAL; ++ } ++ ++ if (fgets(vccp_content, sizeof(vccp_content), fp) == NULL) { ++ error_report("Failed to read content from vccp file %s", state->ccp_dev_path); ++ fclose(fp); ++ return -EINVAL; ++ } ++ fclose(fp); ++ ++ vccp_content[strcspn(vccp_content, "\n")] = '\0'; ++ ++ if (sscanf(vccp_content, "%c", &type_char) != 1) { ++ error_report("Invalid vccp file format %s", state->ccp_dev_path); ++ return -EINVAL; ++ } ++ ++ if (type_char == 'v') { ++ if (sscanf(vccp_content, "v %*d %*d %15s", bdf) != 1) { ++ error_report("Invalid vfio-pci vccp file %s", state->ccp_dev_path); ++ return -EINVAL; ++ } ++ } ++ ++ req_data.path = state->ccp_dev_path; ++ req_data.content = vccp_content; ++ ret = hct_client_send_cmd(HCT_DAEMON_SOCK_PATH, &client_info, HCT_CMD_GET_DEVICE_BY_NAME, &req_data); ++ if (ret != HCT_SUCCESS) { ++ error_report("Failed to send command: %s", hct_get_error_string(ret)); ++ return ret; ++ } ++ ++ /* find specified CCP device */ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_CCP) { ++ if (client_info.device_count != 1 || !client_info.devices) { ++ error_report("CCP mode: Expected 1 device, but received %d", ++ client_info.device_count); ++ hct_client_cleanup(&client_info); ++ return -ENODEV; ++ } ++ device_info = &client_info.devices[0]; ++ ++ state->vdev.fd = device_info->device_fd; ++ } else { /* VFIO-PCI mode */ ++ device_info = hct_find_device_by_pci_addr(&client_info, bdf); ++ if (!device_info) { ++ error_report("Device %s not found", bdf); ++ hct_client_cleanup(&client_info); ++ return -ENODEV; ++ } ++ /* get corresponding group information */ ++ group_info = &client_info.groups[device_info->group_index]; ++ ++ /* use returned file descriptor */ ++ state->container_fd = client_info.container_fd; ++ state->group_fd = group_info->group_fd; ++ state->vdev.fd = device_info->device_fd; ++ state->group_id = group_info->group_id; ++ hct_data.vfio_container_fd = state->container_fd; ++ } ++ ++ ++ /* note: do not call hct_client_cleanup, because we need to keep FD open */ ++ /* only clean up dynamic allocated memory, do not close FD */ ++ if (client_info.groups) { ++ free(client_info.groups); ++ } ++ if (client_info.devices) { ++ free(client_info.devices); ++ } ++ ++ return 0; ++} ++ ++/** ++ * @brief Parse single TLV record ++ * @param buffer The buffer to parse ++ * @param buffer_len The length of the buffer ++ * @param offset The offset to parse ++ * @param tlv The TLV to parse ++ * @return 0 on success, -1 on failure ++ */ ++static int hct_parse_tlv(const char *buffer, size_t buffer_len, size_t *offset, hct_tlv_t *tlv) ++{ ++ if (*offset + sizeof(uint16_t) * 2 > buffer_len) { ++ return -1; ++ } ++ ++ memcpy(&tlv->type, buffer + *offset, sizeof(uint16_t)); ++ *offset += sizeof(uint16_t); ++ ++ memcpy(&tlv->length, buffer + *offset, sizeof(uint16_t)); ++ *offset += sizeof(uint16_t); ++ ++ if (*offset + tlv->length > buffer_len) { ++ return -1; ++ } ++ ++ if (tlv->length > 0) { ++ tlv->value = malloc(tlv->length); ++ if (!tlv->value) { ++ return -1; ++ } ++ memcpy(tlv->value, buffer + *offset, tlv->length); ++ *offset += tlv->length; ++ } else { ++ tlv->value = NULL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * @brief Add a TLV to the buffer ++ * @param buffer The buffer to add the TLV to ++ * @param current_len The current length of the buffer ++ * @param max_len The maximum length of the buffer ++ * @param type The type of the TLV ++ * @param value The value of the TLV ++ * @param length The length of the TLV ++ * @return 0 on success, -1 on failure ++ */ ++static int hct_add_tlv_to_buffer(char *buffer, size_t *current_len, size_t max_len, uint16_t type, const void *value, uint16_t length) ++{ ++ if (*current_len + sizeof(type) + sizeof(length) + length > max_len) { ++ error_report("Buffer overflow in TLV add\n"); ++ return -1; ++ } ++ ++ memcpy(buffer + *current_len, &type, sizeof(type)); ++ *current_len += sizeof(type); ++ ++ memcpy(buffer + *current_len, &length, sizeof(length)); ++ *current_len += sizeof(length); ++ ++ if (length > 0 && value) { ++ memcpy(buffer + *current_len, value, length); ++ *current_len += length; ++ } ++ ++ return 0; ++} ++ ++/** ++ * @brief Send command to daemon ++ * @param sock The socket to send to ++ * @param cmd The command to send ++ * @param device_names Array of device names (for HCT_CMD_GET_DEVICE_BY_NAME) ++ * @param device_count Number of device names ++ * @return 0 on success, -1 on failure ++ */ ++static int hct_send_command(int sock, enum hct_daemon_req_cmd cmd, void *req_data) ++{ ++ char buffer[2048] = {0}; ++ size_t buffer_len = 0; ++ struct hct_vccp_req *vccp_req = NULL; ++ ++ /* add command TLV */ ++ if (hct_add_tlv_to_buffer(buffer, &buffer_len, sizeof(buffer), ++ HCT_IPC_FIELD_COMMAND, &cmd, sizeof(cmd)) < 0) { ++ error_report("Failed to add command TLV\n"); ++ return -1; ++ } ++ ++ /* add device names TLV if present */ ++ if (cmd == HCT_CMD_GET_DEVICE_BY_NAME) { ++ vccp_req = req_data; ++ if (hct_add_tlv_to_buffer(buffer, &buffer_len, sizeof(buffer), ++ HCT_IPC_FIELD_VCCP_PATH, vccp_req->path, ++ strlen(vccp_req->path) + 1) < 0) { ++ error_report("Failed to add vccp path TLV\n"); ++ return -1; ++ } ++ if (hct_add_tlv_to_buffer(buffer, &buffer_len, sizeof(buffer), ++ HCT_IPC_FIELD_VCCP_CONTENT, vccp_req->content, ++ strlen(vccp_req->content) + 1) < 0) { ++ error_report("Failed to add vccp content TLV\n"); ++ return -1; ++ } ++ } ++ ++ /* send command */ ++ if (send(sock, buffer, buffer_len, 0) < 0) { ++ error_report("Failed to send command: %s\n", strerror(errno)); ++ return -1; ++ } ++ ++ return 0; ++} ++ ++/** ++ * @brief Send command request to HCT daemon and get response ++ * @param socket_path The path to the socket ++ * @param client_info The client information ++ * @param cmd The command to send ++ * @param device_names Array of device names (for HCT_CMD_GET_DEVICE_BY_NAME) ++ * @param device_count Number of device names ++ * @return 0 on success, -1 on failure ++ */ ++static int hct_client_send_cmd(const char *socket_path, hct_client_info_t *client_info, ++ enum hct_daemon_req_cmd cmd, void *req_data) ++{ ++ char cmsgbuf[CMSG_SPACE(sizeof(int) * MAX_FD_COUNT)] = {0}; ++ char buffer[MAX_TLV_BUFFER_SIZE]; ++ char error_reason[256] = {0}; ++ int fds[MAX_FD_COUNT] = {0}; ++ struct sockaddr_un addr; ++ struct msghdr msg; ++ struct iovec iov; ++ hct_tlv_t tlv; ++ struct cmsghdr *cmsg = NULL; ++ int *fdptr = NULL; ++ ssize_t received = 0; ++ size_t offset = 0; ++ size_t fd_size = 0; ++ int current_group_index = 0; ++ int fd_index = 0; ++ int sock = 0; ++ int has_pending_group_info = 0; ++ int has_pending_device_info = 0; ++ int device_index = 0; ++ ++ /* Temporary variables to hold info before FD */ ++ struct { ++ int group_id; ++ int device_count; ++ } pending_group_info = {0}; ++ struct { ++ char pci_addr[16]; ++ int group_id; ++ } pending_device_info = {0}; ++ ++ ++ if (!socket_path || !client_info) { ++ return HCT_ERROR_INVALID_DATA; ++ } ++ ++ memset(client_info, 0, sizeof(hct_client_info_t)); ++ sock = socket(AF_UNIX, SOCK_STREAM, 0); ++ if (sock < 0) { ++ error_report("Failed to create socket: %s\n", strerror(errno)); ++ return HCT_ERROR_CONNECT; ++ } ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sun_family = AF_UNIX; ++ strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1); ++ ++ if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ error_report("Failed to connect to %s: %s\n", socket_path, strerror(errno)); ++ close(sock); ++ return HCT_ERROR_CONNECT; ++ } ++ ++ if (hct_send_command(sock, cmd, req_data) < 0) { ++ error_report("[HCT] Failed to send command"); ++ close(sock); ++ return HCT_ERROR_CONNECT; ++ } ++ ++ memset(&msg, 0, sizeof(msg)); ++ iov.iov_base = buffer; ++ iov.iov_len = sizeof(buffer); ++ ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ msg.msg_control = cmsgbuf; ++ msg.msg_controllen = sizeof(cmsgbuf); ++ ++ received = recvmsg(sock, &msg, 0); ++ if (received <= 0) { ++ error_report("Failed to receive message: %s\n", strerror(errno)); ++ close(sock); ++ return HCT_ERROR_RECEIVE; ++ } ++ ++ /* Extract file descriptors from control message */ ++ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { ++ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { ++ fd_size = cmsg->cmsg_len - CMSG_LEN(0); ++ fdptr = (int *)CMSG_DATA(cmsg); ++ memcpy(fds, fdptr, fd_size); ++ } ++ } ++ ++ offset = 0; ++ fd_index = 0; ++ current_group_index = -1; ++ ++ while (offset < received) { ++ memset(&tlv, 0, sizeof(tlv)); ++ if (hct_parse_tlv(buffer, received, &offset, &tlv) < 0) { ++ error_report("[HCT] Failed to parse TLV data"); ++ hct_client_cleanup(client_info); ++ close(sock); ++ return HCT_ERROR_INVALID_DATA; ++ } ++ ++ /* Process different TLV types according to order */ ++ switch (tlv.type) { ++ case HCT_IPC_FIELD_CONTAINER_FD: ++ client_info->container_fd = fds[fd_index]; ++ fd_index++; ++ break; ++ ++ case HCT_IPC_FIELD_GROUP_INFO: ++ /* Store group info for next group FD */ ++ if (tlv.length == sizeof(pending_group_info)) { ++ memcpy(&pending_group_info, tlv.value, sizeof(pending_group_info)); ++ has_pending_group_info = 1; ++ } else { ++ error_report("Invalid group info size\n"); ++ } ++ break; ++ ++ case HCT_IPC_FIELD_GROUP_FD: ++ if (!has_pending_group_info) { ++ error_report("Received group FD without group info"); ++ break; ++ } ++ ++ /* New group detected, allocate space */ ++ current_group_index++; ++ client_info->group_count = current_group_index + 1; ++ ++ /* Reallocate groups array */ ++ client_info->groups = realloc(client_info->groups, sizeof(hct_group_info_t) * client_info->group_count); ++ if (!client_info->groups) { ++ error_report("Failed to allocate memory for groups"); ++ hct_client_cleanup(client_info); ++ close(sock); ++ return HCT_ERROR_INVALID_DATA; ++ } ++ ++ /* Initialize new group with real group_id */ ++ memset(&client_info->groups[current_group_index], 0, sizeof(hct_group_info_t)); ++ client_info->groups[current_group_index].group_id = pending_group_info.group_id; ++ client_info->groups[current_group_index].group_fd = fds[fd_index]; ++ client_info->groups[current_group_index].device_count = 0; ++ ++ fd_index++; ++ has_pending_group_info = 0; /* Clear pending info */ ++ break; ++ ++ case HCT_IPC_FIELD_DEVICE_INFO: ++ /* Store device info for next device FD */ ++ if (tlv.length == sizeof(pending_device_info)) { ++ memcpy(&pending_device_info, tlv.value, sizeof(pending_device_info)); ++ has_pending_device_info = 1; ++ } else { ++ error_report("Invalid device info size\n"); ++ } ++ break; ++ ++ case HCT_IPC_FIELD_DEVICE_FD: ++ /* In CCP mode, we only receive DEVICE_FD, no preceding INFO TLVs */ ++ if (hct_data.driver == HCT_CCP_DRV_MOD_VFIO_PCI) { ++ if (!has_pending_device_info) { ++ error_report("VFIO mode: Received device FD without device info"); ++ break; ++ } ++ if (current_group_index < 0) { ++ error_report("Received device FD without group context"); ++ break; ++ } ++ } ++ ++ /* Allocate space for new device */ ++ client_info->device_count++; ++ client_info->devices = realloc(client_info->devices, sizeof(hct_ccp_device_t) * client_info->device_count); ++ if (!client_info->devices) { ++ error_report("Failed to allocate memory for devices"); ++ hct_client_cleanup(client_info); ++ close(sock); ++ return HCT_ERROR_INVALID_DATA; ++ } ++ ++ /* Initialize new device with real pci_addr */ ++ device_index = client_info->device_count - 1; ++ memset(&client_info->devices[device_index], 0, sizeof(hct_ccp_device_t)); ++ client_info->devices[device_index].device_fd = fds[fd_index]; ++ ++ if (hct_data.driver != HCT_CCP_DRV_MOD_CCP) { ++ client_info->devices[device_index].group_index = current_group_index; ++ ++ /* Use real pci_addr from device info */ ++ strncpy(client_info->devices[device_index].pci_addr, ++ pending_device_info.pci_addr, ++ sizeof(client_info->devices[device_index].pci_addr) - 1); ++ client_info->devices[device_index].pci_addr[ ++ sizeof(client_info->devices[device_index].pci_addr) - 1] = '\0'; ++ ++ /* Update group device count */ ++ client_info->groups[current_group_index].device_count++; ++ } ++ ++ fd_index++; ++ has_pending_device_info = 0; /* Clear pending info */ ++ break; ++ case HCT_IPC_FIELD_ERROR_REASON: ++ error_report("IPC return error"); ++ break; ++ ++ default: ++ error_report("Unknown TLV type: %d", tlv.type); ++ break; ++ } ++ ++ if (tlv.value) { ++ free(tlv.value); ++ } ++ } ++ ++ if (error_reason[0] != '\0') { ++ error_report("Rejected request"); ++ hct_client_cleanup(client_info); ++ close(sock); ++ return HCT_ERROR_INVALID_DATA; ++ } ++ ++ close(sock); ++ ++ return HCT_SUCCESS; ++} ++ ++/** ++ * @brief Cleanup HCT client resources ++ * @param client_info The client information ++ */ ++static void hct_client_cleanup(hct_client_info_t *client_info) ++{ ++ if (!client_info) { ++ return; ++ } ++ ++ if (client_info->devices) { ++ free(client_info->devices); ++ client_info->devices = NULL; ++ } ++ ++ if (client_info->groups) { ++ free(client_info->groups); ++ client_info->groups = NULL; ++ } ++ ++ if (client_info->container_fd >= 0) { ++ client_info->container_fd = -1; ++ } ++ ++ client_info->device_count = 0; ++ client_info->group_count = 0; ++} ++ ++/** ++ * @brief Get error description string ++ * @param error_code The error code ++ * @return The error description string ++ */ ++static const char *hct_get_error_string(int error_code) ++{ ++ switch (error_code) { ++ case HCT_SUCCESS: ++ return "Success"; ++ case HCT_ERROR_CONNECT: ++ return "Failed to connect"; ++ case HCT_ERROR_RECEIVE: ++ return "Failed to receive data"; ++ case HCT_ERROR_INVALID_DATA: ++ return "Invalid data received"; ++ default: ++ return "Unknown error"; ++ } ++} ++ ++/** ++ * @brief Find device by PCI address ++ * @param client_info The client information ++ * @param pci_addr The PCI address of the device ++ * @return The device information ++ */ ++static hct_ccp_device_t* hct_find_device_by_pci_addr(hct_client_info_t *client_info, const char *pci_addr) ++{ ++ int i = 0; ++ ++ if (!client_info || !pci_addr || !client_info->devices) { ++ return NULL; ++ } ++ ++ for (i = 0; i < client_info->device_count; i++) { ++ if (strcmp(client_info->devices[i].pci_addr, pci_addr) == 0) { ++ return &client_info->devices[i]; ++ } ++ } ++ ++ return NULL; ++} ++ ++/** ++ * @brief Allocate a global bitmap instance for g_id management ++ * @return The allocated bitmap instance, or NULL on failure ++ */ ++static struct hct_gid_bitmap* hct_gid_bitmap_alloc(void) ++{ ++ struct hct_gid_bitmap *gid_bitmap = NULL; ++ mode_t oldmod = 0; ++ size_t total_size = 0; ++ int shm_fd = -1; ++ int lock_fd = -1; ++ ++ gid_bitmap = (struct hct_gid_bitmap *)calloc(1, sizeof(struct hct_gid_bitmap)); ++ if (!gid_bitmap) { ++ error_report("Failed to allocate hct_gid_bitmap structure\n"); ++ return NULL; ++ } ++ ++ strncpy(gid_bitmap->name, HCT_GID_BITMAP_SHM_NAME, MAX_PATH - 1); ++ gid_bitmap->name[MAX_PATH - 1] = '\0'; ++ gid_bitmap->shm_fd = -1; ++ gid_bitmap->lock_fd = -1; ++ ++ total_size = HCT_BITMAP_SIZE(HCT_QEMU_GIDS_BITMAP_MAX_BIT) * sizeof(unsigned long); ++ gid_bitmap->len = total_size; ++ ++ oldmod = umask(0); ++ shm_fd = shm_open(HCT_GID_BITMAP_SHM_NAME, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0666); ++ umask(oldmod); ++ ++ if (shm_fd < 0 && errno == EEXIST) { ++ // Shared memory already exists, try to open it ++ shm_fd = shm_open(HCT_GID_BITMAP_SHM_NAME, O_RDWR | O_CLOEXEC, 0666); ++ } ++ ++ if (shm_fd < 0) { ++ error_report("Failed to create/open shared memory %s, errno %d\n", HCT_GID_BITMAP_SHM_NAME, errno); ++ goto cleanup; ++ } ++ ++ gid_bitmap->shm_fd = shm_fd; ++ if (ftruncate(shm_fd, total_size) != 0) { ++ error_report("Failed to set shared memory size, errno %d\n", errno); ++ goto cleanup; ++ } ++ ++ gid_bitmap->bitmap = (unsigned long *)mmap(NULL, total_size, PROT_READ | PROT_WRITE, ++ MAP_SHARED, shm_fd, 0); ++ if (gid_bitmap->bitmap == MAP_FAILED) { ++ error_report("Failed to mmap shared memory, errno %d\n", errno); ++ goto cleanup; ++ } ++ ++ lock_fd = shm_open(HCT_GID_LOCK_FILE, O_CREAT | O_EXCL | O_RDWR, 0666); ++ if (lock_fd < 0) { ++ if (errno == EEXIST) { ++ lock_fd = shm_open(HCT_GID_LOCK_FILE, O_RDWR, 0); ++ if (lock_fd == -1) { ++ error_report("Failed to shm_open lock file %s, errno %d\n", HCT_GID_LOCK_FILE, errno); ++ goto cleanup; ++ } ++ } else { ++ error_report("Failed to shm_open lock file %s, errno %d\n", HCT_GID_LOCK_FILE, errno); ++ goto cleanup; ++ } ++ } else { ++ if (ftruncate(lock_fd, HCT_QEMU_GIDS_BITMAP_MAX_BIT * 8) != 0) { ++ error_report("Failed to ftruncate lock shm %s, errno %d\n", HCT_GID_LOCK_FILE, errno); ++ goto cleanup; ++ } ++ if (fchmod(lock_fd, 0666) == -1) { ++ error_report("fchmod failed\n"); ++ } ++ } ++ ++ gid_bitmap->lock_fd = lock_fd; ++ g_hct_gid_bitmap = gid_bitmap; ++ ++ return gid_bitmap; ++ ++cleanup: ++ hct_gid_bitmap_free(gid_bitmap); ++ return NULL; ++} ++ ++/** ++ * @brief Free a global bitmap instance for g_id management ++ * @param bitmap The bitmap instance to free ++ */ ++static void hct_gid_bitmap_free(struct hct_gid_bitmap *bitmap) ++{ ++ if (!bitmap) ++ return; ++ ++ if (bitmap->bitmap && bitmap->bitmap != MAP_FAILED) { ++ munmap(bitmap->bitmap, bitmap->len); ++ } ++ ++ if (bitmap->shm_fd >= 0) { ++ close(bitmap->shm_fd); ++ } ++ ++ if (bitmap->lock_fd >= 0) { ++ close(bitmap->lock_fd); ++ } ++ ++ if (g_hct_gid_bitmap == bitmap) { ++ g_hct_gid_bitmap = NULL; ++ } ++ ++ free(bitmap); ++} ++ ++/** ++ * @brief Allocate a g_id from the 1024-bit bitmap, left-shift by 8 bits ++ * @param bitmap The bitmap instance to allocate from ++ * @return The allocated g_id, or -1 if no g_id is available ++ */ ++static inline int _hct_gid_bitmap_try_alloc(struct hct_gid_bitmap *bitmap) { ++ unsigned long bit_pos = 0; ++ unsigned long old_val = 0, new_val = 0; ++ volatile unsigned long *word_ptr = NULL; ++ unsigned long mask = 0; ++ ++ for (bit_pos = HCT_QEMU_GIDS_BITMAP_MIN_BIT; bit_pos < HCT_QEMU_GIDS_BITMAP_MAX_BIT; bit_pos++) { ++ if (!hct_get_bit(bitmap->bitmap, bit_pos)) { ++ word_ptr = &bitmap->bitmap[WORD_OFFSET(bit_pos)]; ++ mask = 1UL << BIT_OFFSET(bit_pos); ++ old_val = *word_ptr; ++ if (!(old_val & mask)) { ++ new_val = old_val | mask; ++ if (__atomic_compare_exchange_n(word_ptr, &old_val, new_val, ++ false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { ++ return bit_pos; ++ } ++ } ++ } ++ } ++ return -1; ++} ++ ++/** ++ * @brief Allocate a g_id from the 1024-bit bitmap, left-shift by 8 bits ++ * @param bitmap The bitmap instance to allocate from ++ * @param gid The allocated g_id ++ * @return 0 on success, -1 if no g_id is available ++ */ ++static int hct_g_ids_alloc(struct hct_gid_bitmap *bitmap, unsigned long *gid) ++{ ++ unsigned long id = 0; ++ int bit_pos = -1; ++ int try_count = 0; ++ int max_try = 2; ++ int lock_ret = 0; ++ ++ if (!bitmap || !bitmap->bitmap || !gid) { ++ error_report("Invalid parameters\n"); ++ return -EINVAL; ++ } ++ ++ while (try_count < max_try) { ++ bit_pos = _hct_gid_bitmap_try_alloc(bitmap); ++ if (bit_pos >= 0) { ++ id = bit_pos + 1 - 1024; // g_id can't be 0 ++ *gid = id << HCT_QEMU_GIDS_SHIFT_BITS; ++ lock_ret = hct_g_ids_lock_state_lock(bitmap, *gid); ++ if (lock_ret == 0) { ++ return 0; ++ } else { ++ continue; ++ } ++ } else if (try_count == 0) { ++ // try to clean up orphaned g_ids ++ error_report("try to clean up orphaned g_ids\n"); ++ hct_g_ids_lock_state_walk(bitmap); ++ } ++ try_count++; ++ } ++ ++ error_report("No available g_id in bitmap or all locks busy after cleanup\n"); ++ return -EINVAL; ++} ++ ++/** ++ * @brief Free a g_id from the 1024-bit bitmap, left-shift by 8 bits ++ * @param bitmap The bitmap instance to free from ++ * @param gid The g_id to free ++ */ ++static void hct_g_ids_free(struct hct_gid_bitmap *bitmap, unsigned long gid) ++{ ++ unsigned long id = 0; ++ unsigned long bit_pos = 0; ++ off_t offset = 0; ++ struct flock lock; ++ ++ if (!bitmap || !bitmap->bitmap) { ++ error_report("Invalid bitmap parameters\n"); ++ return; ++ } ++ ++ if (gid == 0) { ++ error_report("Invalid g_id=0\n"); ++ return; ++ } ++ ++ // Extract original id from g_id ++ id = gid >> HCT_QEMU_GIDS_SHIFT_BITS; ++ if (id == 0 || id > HCT_QEMU_GIDS_BITMAP_MAX_BIT) { ++ error_report("Invalid g_id=0x%lx, extracted id=%lu\n", gid, id); ++ return; ++ } ++ ++ bit_pos = id - 1 + 1024; // Convert back to bit position ++ ++ offset = bit_pos * HCT_GIDS_PER_BLOCK; ++ lock.l_type = F_UNLCK; ++ lock.l_whence = SEEK_SET; ++ lock.l_start = offset; ++ lock.l_len = HCT_GIDS_PER_BLOCK; ++ if (bitmap->lock_fd >= 0) ++ fcntl(bitmap->lock_fd, F_SETLK, &lock); ++ ++ hct_clear_bit(bitmap->bitmap, bit_pos); ++} ++ ++/** ++ * @brief Lock a g_id ++ * @param bitmap The bitmap instance to lock ++ * @param gid The g_id to lock ++ * @return 0 on success, -1 if the g_id is not locked ++ */ ++static int hct_g_ids_lock_state_lock(struct hct_gid_bitmap *bitmap, unsigned long gid) ++{ ++ struct flock lock; ++ unsigned long id = 0; ++ off_t offset = 0; ++ int ret = 0; ++ ++ if (!bitmap || !bitmap->bitmap || bitmap->lock_fd < 0) { ++ error_report("Invalid bitmap\n"); ++ return -EINVAL; ++ } ++ ++ if (gid == 0) { ++ error_report("Invalid g_id=0\n"); ++ return -EINVAL; ++ } ++ ++ // Extract original id from g_id ++ id = gid >> HCT_QEMU_GIDS_SHIFT_BITS; ++ if (id == 0 || id > HCT_QEMU_GIDS_BITMAP_MAX_BIT) { ++ error_report("Invalid g_id=0x%lx, extracted id=%lu\n", gid, id); ++ return -EINVAL; ++ } ++ ++ // Calculate file offset for this g_id's lock region ++ offset = (id + 1024 - 1) * HCT_GIDS_PER_BLOCK; ++ ++ // Set up exclusive lock ++ lock.l_type = F_WRLCK; ++ lock.l_whence = SEEK_SET; ++ lock.l_start = offset; ++ lock.l_len = HCT_GIDS_PER_BLOCK; ++ ++ // Try to acquire lock (non-blocking) ++ if (fcntl(bitmap->lock_fd, F_SETLK, &lock) == 0) { ++ ret = 0; ++ } else { ++ error_report("Failed to lock g_id=0x%lx, offset%lx, errno=%d\n", gid, offset, errno); ++ ret = -EINVAL; ++ } ++ ++ return ret; ++} ++ ++/** ++ * @brief Walk the bitmap to detect orphaned g_ids ++ * @param bitmap The bitmap instance to walk ++ */ ++static void hct_g_ids_lock_state_walk(struct hct_gid_bitmap *bitmap) ++{ ++ struct flock lock; ++ unsigned long bit_pos = 0; ++ unsigned long gid = 0; ++ off_t offset = 0; ++ ++ if (!bitmap || !bitmap->bitmap || bitmap->lock_fd < 0) { ++ error_report("Invalid bitmap parameters\n"); ++ return; ++ } ++ ++ // Walk through all allocated bits in bitmap ++ for (bit_pos = HCT_QEMU_GIDS_BITMAP_MIN_BIT; bit_pos < HCT_QEMU_GIDS_BITMAP_MAX_BIT; bit_pos++) { ++ if (hct_get_bit(bitmap->bitmap, bit_pos)) { ++ gid = (bit_pos + 1) << HCT_QEMU_GIDS_SHIFT_BITS; ++ if (gid == *(unsigned long *)((unsigned long)(hct_data.pasid_memory) + HCT_PASID_MEM_GID_OFFSET)) { ++ continue; ++ } ++ if (gid >> HCT_QEMU_GIDS_SHIFT_BITS == 0) { ++ continue; ++ } ++ offset = bit_pos * HCT_GIDS_PER_BLOCK; ++ ++ // Try to acquire exclusive lock for this g_id ++ lock.l_type = F_WRLCK; ++ lock.l_whence = SEEK_SET; ++ lock.l_start = offset; ++ lock.l_len = HCT_GIDS_PER_BLOCK; ++ ++ if (fcntl(bitmap->lock_fd, F_GETLK, &lock) == -1) { ++ error_report("Failed to get lock file status.\n"); ++ return; ++ } ++ if (lock.l_type == F_UNLCK) { ++ info_report("Detected orphaned g_id=0x%lx, cleaning up\n", gid); ++ hct_clear_bit(bitmap->bitmap, bit_pos); ++ } ++ } ++ } ++} ++ ++static void hct_clear_bit(unsigned long *bitmap, int n) ++{ ++ __atomic_fetch_and(&bitmap[WORD_OFFSET(n)], ~(1UL << BIT_OFFSET(n)), __ATOMIC_RELEASE); ++} ++ ++static uint32_t hct_get_bit(unsigned long *bitmap, int n) ++{ ++ return ((bitmap[WORD_OFFSET(n)] & (0x1UL << BIT_OFFSET(n))) != 0); ++} ++ ++static int hct_create_user_shared_memory(const char *name, size_t size) ++{ ++ mode_t oldmod; ++ void *addr = NULL; ++ int shm_fd = -1; ++ int new_mem = 0; ++ ++ oldmod = umask(0); ++ shm_fd = shm_open(name, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0666); ++ umask(oldmod); ++ ++ if (shm_fd < 0) { ++ if (errno == EEXIST) { ++ error_report("Shared memory %s already exists, trying to open existing one\n", name); ++ return 0; ++ } else { ++ error_report("Failed to create/open shared memory %s, errno %d (%s)\n", name, errno, strerror(errno)); ++ return -1; ++ } ++ } else { ++ new_mem = 1; ++ } ++ ++ if (ftruncate(shm_fd, size) != 0) { ++ error_report("Failed to set shared memory size %zu, errno %d (%s)\n", size, errno, strerror(errno)); ++ close(shm_fd); ++ return -1; ++ } ++ ++ if (new_mem) { ++ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); ++ if (addr == MAP_FAILED) { ++ error_report("Failed to mmap shared memory %s, errno %d (%s)\n", name, errno, strerror(errno)); ++ close(shm_fd); ++ return -1; ++ } ++ memset(addr, 0, size); ++ munmap(addr, size); ++ } ++ ++ close(shm_fd); ++ return 0; ++} +-- +2.43.5 + diff --git a/1172-hw-vfio-hct-support-live-migration-function-for-virt.patch b/1172-hw-vfio-hct-support-live-migration-function-for-virt.patch new file mode 100644 index 0000000000000000000000000000000000000000..e960b64eec7c8cdfa6c39cc4c9b89f0c0b0c1f7b --- /dev/null +++ b/1172-hw-vfio-hct-support-live-migration-function-for-virt.patch @@ -0,0 +1,389 @@ +From 79da8273a9a293e7f732bcf8032ba7fc857c2736 Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Tue, 14 Oct 2025 14:17:52 +0800 +Subject: [PATCH 3/5] hw/vfio/hct: support live migration function for virtual + machines. + +Change-Id: I3336ba7fe4582cccdfdcf448a18ebfc1bfa57b92 +--- + hw/vfio/hct.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 292 insertions(+) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 75250587b..0dbc5cb34 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -18,6 +18,8 @@ + #include + #include + #include ++#include ++#include + + #include "qemu/osdep.h" + #include "qemu/queue.h" +@@ -33,6 +35,10 @@ + #include "qemu/error-report.h" + #include "qapi/error.h" + #include "hw/qdev-properties.h" ++#include "hw/virtio/vhost-vsock.h" ++#include "migration/migration.h" ++#include "migration/vmstate.h" ++#include "migration/misc.h" + + // ======================== g_id API ==================== + +@@ -219,6 +225,7 @@ typedef struct { + #define TYPE_HCT_DEV "hct" + #define PCI_HCT_DEV(obj) OBJECT_CHECK(HCTDevState, (obj), TYPE_HCT_DEV) + #define HCT_MAX_PASID (1 << 8) ++#define HCT_MIGRATE_VERSION 1 + + #define PCI_VENDOR_ID_HYGON_CCP 0x1d94 + #define PCI_DEVICE_ID_HYGON_CCP 0x1468 +@@ -260,6 +267,20 @@ typedef struct { + #define PASID_OFFSET 40 + #define HCT_PASID_MEM_GID_OFFSET 1024 + ++/* for migration */ ++#define HCT_MIG_PROTOCOL_VER 1 ++#define HCT_MIG_MSG_MAGIC 0x76484354 ++#define HCT_VMADDR_CID_HOST 2 ++#define HCT_VSOCK_PORT 12345 ++#define HCT_MIG_STATE_ONLINE 0x00 ++#define HCT_MIG_STATE_RESTRICTED 0x01 ++#define HCT_MIG_STATE_STOPPED 0x02 ++#define HCT_MIGRATION_START 0x01 ++#define HCT_CHECK_VM_READINESS 0x02 ++#define HCT_MIGRATION_DONE 0x03 ++#define HCT_MIG_MSG_ACK 0x01 ++#define HCT_MIG_MSG_ERR 0x02 ++ + static volatile struct hct_data { + int init; + int hct_fd; +@@ -286,9 +307,15 @@ typedef struct HctDevState { + MemoryRegion mmio; + MemoryRegion shared; + MemoryRegion pasid; ++ NotifierWithReturn precopy_notifier; ++ QEMUTimer *migrate_load_timer; + uint64_t map_size[PCI_NUM_REGIONS]; ++ uint32_t guest_cid; ++ uint32_t migrate_support; ++ int client_fd; + void *maps[PCI_NUM_REGIONS]; + char *ccp_dev_path; ++ char *vsock_device; + int container_fd; /* vfio container fd */ + int group_fd; /* vfio group fd */ + int group_id; /* vfio group id */ +@@ -470,6 +497,7 @@ static void vfio_hct_exit(PCIDevice *dev) + static Property vfio_hct_properties[] = { + DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), + DEFINE_PROP_STRING("dev", HCTDevState, ccp_dev_path), ++ DEFINE_PROP_STRING("vsock-device", HCTDevState, vsock_device), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -856,6 +884,248 @@ static int hct_get_used_driver_walk(const char *path) + return ret; + } + ++static int hct_get_vsock_guest_cid(HCTDevState *state) ++{ ++ Object *dev = NULL; ++ gchar *path = NULL; ++ uint32_t cid; ++ ++ if (!state->vsock_device) { ++ dev = object_resolve_path_type("", TYPE_VHOST_VSOCK, NULL); ++ if (!dev) { ++ error_report("get Object for %s failed.", TYPE_VHOST_VSOCK); ++ return -1; ++ } ++ } else { ++ path = g_strdup_printf("/machine/peripheral/%s", state->vsock_device); ++ dev = object_resolve_path(path, NULL); ++ g_free(path); ++ if (!dev) { ++ error_report("get Object for %s failed.", path); ++ return -1; ++ } ++ } ++ ++ cid = object_property_get_uint(dev, "guest-cid", NULL); ++ if (cid <= HCT_VMADDR_CID_HOST) { ++ error_report("cid = %u, invalid.", cid); ++ return -1; ++ } ++ ++ state->guest_cid = cid; ++ return 0; ++} ++ ++static int hct_client_vsock_connect_op(HCTDevState *state) ++{ ++ struct sockaddr_vm host_addr; ++ int sock_fd; ++ ++ if (state->guest_cid <= HCT_VMADDR_CID_HOST) { ++ error_report("state->guest_cid = %u, invalid.", state->guest_cid); ++ return -1; ++ } ++ ++ sock_fd = socket(AF_VSOCK, SOCK_STREAM, 0); ++ if (sock_fd < 0) { ++ perror("socket creation failed"); ++ return -1; ++ } ++ ++ memset(&host_addr, 0, sizeof(host_addr)); ++ host_addr.svm_family = AF_VSOCK; ++ host_addr.svm_cid = state->guest_cid; ++ host_addr.svm_port = HCT_VSOCK_PORT; ++ ++ if (connect(sock_fd, (struct sockaddr*)&host_addr, sizeof(host_addr)) < 0) { ++ perror("connect failed"); ++ close(sock_fd); ++ return -EAGAIN; ++ } ++ ++ state->client_fd = sock_fd; ++ return 0; ++} ++ ++static int hct_client_vsock_send_msg(int sock_fd, char *buf, size_t len) ++{ ++ struct msghdr msg; ++ struct iovec iov; ++ ++ memset(&msg, 0, sizeof(msg)); ++ iov.iov_base = buf; ++ iov.iov_len = len; ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ return sendmsg(sock_fd, &msg, 0); ++} ++ ++static int hct_client_vsock_recv_msg(int sock_fd, char *buf, size_t len) ++{ ++ struct msghdr msg; ++ struct iovec iov; ++ ++ memset(&msg, 0, sizeof(msg)); ++ iov.iov_base = buf; ++ iov.iov_len = len; ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ return recvmsg(sock_fd, &msg, 0); ++} ++ ++static int hct_client_send_msg(HCTDevState *state, char *buf, size_t len, int mloop) ++{ ++ int loops = 0; ++ int ret = -1; ++ ++ while ((ret = hct_client_vsock_connect_op(state)) != 0 && ret == -EAGAIN) { ++ if (!mloop) { ++ return -EAGAIN; ++ } ++ if (++loops > mloop) { ++ error_report("loops = %d, connect failed.", loops); ++ return -1; ++ } ++ usleep(20 * 1000); ++ } ++ ++ if (hct_client_vsock_send_msg(state->client_fd, (char *)buf, len) < 0) { ++ error_report("hct_client_vsock_send_msg failed."); ++ goto exit; ++ } ++ ++ memset((void *)buf, 0, len); ++ if (hct_client_vsock_recv_msg(state->client_fd, (char *)buf, len) < 0) { ++ error_report("hct_client_vsock_recv_msg failed."); ++ goto exit; ++ } ++ ++ ret = 0; ++ ++exit: ++ close(state->client_fd); ++ return ret; ++} ++ ++static int hct_migrate_precopy_notifier(NotifierWithReturn *notifier, void *data) ++{ ++ HCTDevState *state = container_of(notifier, HCTDevState, precopy_notifier); ++ MigrationState *ms = migrate_get_current(); ++ PrecopyNotifyData *pnd = data; ++ int msg[16]; ++ int MAX_CONNECT_LOOPS = 10; ++ int MAX_CHECK_LOOPS = 20; ++ int loops = 0; ++ int ret = -1; ++ ++ if (pnd->reason != PRECOPY_NOTIFY_SETUP) ++ return 0; ++ ++ /* [0]:magic [1]:version [2]:op [3]:sync_state */ ++ msg[0] = HCT_MIG_MSG_MAGIC; ++ msg[1] = HCT_MIG_PROTOCOL_VER; ++ msg[2] = HCT_MIGRATION_START; ++ msg[3] = 0; ++ ret = hct_client_send_msg(state, (char *)msg, sizeof(msg), MAX_CONNECT_LOOPS); ++ if (ret != 0 || msg[0] != HCT_MIG_MSG_MAGIC) { ++ /* Perform live migration addording to a regular virtual machine. */ ++ error_report("ret:%d msg[0]:0x%x, please install a newer hct.ko" ++ " for the virtual machine.", ret, msg[0]); ++ state->migrate_support = 0; ++ return 0; ++ } else if (msg[3] != HCT_MIG_MSG_ACK) { ++ /* We believe that the virtual machine is not ready, ++ * so terminate the live migration. ++ */ ++ error_setg(pnd->errp, "%s[%u] msg[3]:0x%02x, invalid.\n", ++ __func__, __LINE__, msg[3]); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } ++ ++ while (++loops <= MAX_CHECK_LOOPS) { ++ msg[0] = HCT_MIG_MSG_MAGIC; ++ msg[1] = HCT_MIG_PROTOCOL_VER; ++ msg[2] = HCT_CHECK_VM_READINESS; ++ msg[3] = 0; ++ ret = hct_client_send_msg(state, (char *)msg, sizeof(msg), MAX_CONNECT_LOOPS); ++ if (ret != 0 || msg[0] != HCT_MIG_MSG_MAGIC) { ++ error_setg(pnd->errp, "ret:%d msg[0]:0x%x, invalid.", ret, msg[0]); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } else if (msg[3] == HCT_MIG_STATE_STOPPED) { ++ break; ++ } ++ sleep(1); ++ } ++ if (loops > MAX_CHECK_LOOPS) { ++ error_setg(pnd->errp, "%s[%u] loops:%d > MAX_CHECK_LOOPS:%d, will cancel.\n", ++ __func__, __LINE__, loops, MAX_CHECK_LOOPS); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } ++ ++ info_report("%s: recieved HCT_MIG_MSG_ACK.", __func__); ++ return 0; ++} ++ ++static void hct_client_connect_timer_cb(void *opaque) ++{ ++ HCTDevState *state = opaque; ++ int msg[16]; ++ int MAX_LOOP_TIMES = 10; ++ static int loops = 0; ++ int ret; ++ ++ /* [0]:magic [1]:version [2]:op [3]:sync_state */ ++ msg[0] = HCT_MIG_MSG_MAGIC; ++ msg[1] = HCT_MIG_PROTOCOL_VER; ++ msg[2] = HCT_MIGRATION_DONE; ++ msg[3] = 0; ++ ret = hct_client_send_msg(state, (char *)msg, sizeof(msg), 0); ++ if (ret == -EAGAIN) { ++ if (++loops > MAX_LOOP_TIMES) { ++ error_report("%s: loops = %d, connect failed.", __func__, loops); ++ goto exit; ++ } ++ timer_mod(state->migrate_load_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ++ NANOSECONDS_PER_SECOND * 2); /* 2s */ ++ return; ++ } ++ ++ if (ret != 0 || msg[0] != HCT_MIG_MSG_MAGIC || msg[3] != HCT_MIG_MSG_ACK) { ++ error_report("ret:%d msg[0]:0x%x msg[3]:0x%x, invalid.\n", ret, msg[0], msg[3]); ++ goto exit; ++ } ++ info_report("%s: recieved HCT_MIG_MSG_ACK.", __func__); ++ ++exit: ++ timer_free(state->migrate_load_timer); ++ state->migrate_load_timer = NULL; ++ close(state->client_fd); ++ loops = 0; ++} ++ ++static int hct_dev_post_load(void *opaque, int version_id) ++{ ++ HCTDevState *state = opaque; ++ ++ if (!state->migrate_support) ++ return 0; ++ ++ /* When there are multiple ccp devices, each device will ++ * execute the post_load function once. ++ * We only hope that the first device can set the timer. ++ */ ++ state->migrate_support = 0; ++ state->migrate_load_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ++ hct_client_connect_timer_cb, state); ++ timer_mod(state->migrate_load_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ++ NANOSECONDS_PER_SECOND * 5); /* 5s */ ++ ++ return 0; ++} ++ + static void hct_data_uninit(HCTDevState *state) + { + if (hct_data.hct_fd) { +@@ -893,6 +1163,7 @@ static void hct_data_uninit(HCTDevState *state) + } + + memory_listener_unregister(&hct_memory_listener); ++ precopy_remove_notifier(&state->precopy_notifier); + + hct_data.init = 0; + hct_data.driver = HCT_CCP_DRV_MOD_UNINIT; +@@ -1012,6 +1283,15 @@ static int hct_data_init(HCTDevState *state) + if (ret < 0) + goto unmap_pasid_memory_exit; + ++ ret = hct_get_vsock_guest_cid(state); ++ if (ret < 0) ++ error_report("get the guest_cid of vsock device fail."); ++ ++ state->precopy_notifier.notify = hct_migrate_precopy_notifier; ++ precopy_add_notifier(&state->precopy_notifier); ++ state->migrate_load_timer = NULL; ++ state->migrate_support = 1; ++ + /* perform DMA_MAP and DMA_UNMAP operations on all memories of the virtual machine. */ + memory_listener_register(&hct_memory_listener, &address_space_memory); + +@@ -1141,12 +1421,24 @@ out: + return; + } + ++static const VMStateDescription vfio_hct_vmstate = { ++ .name = "vfio-hct-dev", ++ .version_id = HCT_MIGRATE_VERSION, ++ .minimum_version_id = HCT_MIGRATE_VERSION, ++ .post_load = hct_dev_post_load, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT32(migrate_support, HCTDevState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static void hct_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass); + + dc->desc = "HCT Device"; ++ dc->vmsd = &vfio_hct_vmstate; + device_class_set_props(dc, vfio_hct_properties); + + pdc->realize = vfio_hct_realize; +-- +2.43.5 + diff --git a/1173-hw-vfio-hct-fix-virtual-machine-paused-due-to-obtain.patch b/1173-hw-vfio-hct-fix-virtual-machine-paused-due-to-obtain.patch new file mode 100644 index 0000000000000000000000000000000000000000..8744359e00f8c5c7a5dacb14cf77493a40513b85 --- /dev/null +++ b/1173-hw-vfio-hct-fix-virtual-machine-paused-due-to-obtain.patch @@ -0,0 +1,165 @@ +From e2e0049e25fabc128e09e0978db38a1553e7026b Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Wed, 15 Oct 2025 21:37:34 +0800 +Subject: [PATCH 4/5] hw/vfio/hct: fix virtual machine paused due to obtain VQ + failed. + +Change-Id: I1f01db41fcfeba016e69c223bc426cba46464ecc +--- + hw/vfio/hct.c | 111 +++++++++++++++----------------------------------- + 1 file changed, 33 insertions(+), 78 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 0dbc5cb34..146439a71 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -380,9 +380,6 @@ static void hct_client_cleanup(hct_client_info_t *client_info); + /* @brief hct client send cmd function */ + static int hct_client_send_cmd(const char *socket_path, hct_client_info_t *client_info, enum hct_daemon_req_cmd cmd, void *req_data); + +-/* @brief hct create user shared memory function */ +-static int hct_create_user_shared_memory(const char *name, size_t size); +- + static int hct_get_sysfs_value(const char *path, int *val) + { + FILE *fp = NULL; +@@ -749,19 +746,42 @@ static int hct_api_version_check(void) + + static int hct_shared_memory_init(void) + { +- int ret = 0; ++ const char *name = HCT_GLOBAL_SHARE_SHM_NAME; ++ size_t size = HCT_SHARED_MEMORY_SIZE; ++ void *vaddr = NULL; ++ int shm_fd = -1; ++ mode_t oldmod; ++ ++ oldmod = umask(0); ++ shm_fd = shm_open(name, O_RDWR | O_CREAT | O_EXCL | O_CLOEXEC, 0666); ++ umask(oldmod); ++ if (shm_fd < 0 && errno == EEXIST) ++ shm_fd = shm_open(name, O_RDWR | O_CLOEXEC, 0666); ++ if (shm_fd < 0) { ++ error_report("Failed to open file %s, errno: %d.\n", name, errno); ++ return -1; ++ } ++ ++ if (ftruncate(shm_fd, size) != 0) { ++ error_report("Failed to ftruncate file %s, errno: %d\n", name, errno); ++ close(shm_fd); ++ return -1; ++ } + +- hct_data.hct_shared_memory = mmap(NULL, hct_data.hct_shared_size, +- PROT_READ | PROT_WRITE, MAP_SHARED, +- hct_data.hct_shm_fd, 0); +- if (hct_data.hct_shared_memory == MAP_FAILED) { +- ret = -errno; ++ vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); ++ if (vaddr == MAP_FAILED) { + error_report("map hct shared memory fail\n"); +- goto out; ++ close(shm_fd); ++ return -ENOMEM; + } + +-out: +- return ret; ++ if (flock(shm_fd, LOCK_EX | LOCK_NB) == 0) ++ memset(vaddr, 0, size); ++ ++ hct_data.hct_shm_fd = shm_fd; ++ hct_data.hct_shared_size = size; ++ hct_data.hct_shared_memory = vaddr; ++ return flock(shm_fd, LOCK_SH); + } + + static void hct_listener_region_add(MemoryListener *listener, +@@ -1141,7 +1161,7 @@ static void hct_data_uninit(HCTDevState *state) + } + + if (hct_data.hct_shm_fd) { +- qemu_close(hct_data.hct_shm_fd); ++ close(hct_data.hct_shm_fd); + hct_data.hct_shm_fd = 0; + } + if (hct_data.pasid) { +@@ -1232,27 +1252,6 @@ static int hct_data_init(HCTDevState *state) + } + } + if (hct_data.init == 0) { +- hct_shr_name = HCT_GLOBAL_SHARE_SHM_PATH; +- hct_data.hct_shm_fd = qemu_open_old(hct_shr_name, O_RDWR); +- if (hct_data.hct_shm_fd < 0) { +- if (errno == 2) { +- ret = hct_create_user_shared_memory(HCT_GLOBAL_SHARE_SHM_NAME, HCT_SHARED_MEMORY_SIZE); +- if (!ret) { +- hct_data.hct_shm_fd = qemu_open_old(hct_shr_name, O_RDWR); +- if (hct_data.hct_shm_fd < 0) { +- ret = -errno; +- } +- } +- } else { +- ret = -errno; +- } +- if (ret < 0) { +- error_report("fail to open %s, errno %d.", hct_shr_name, errno); +- goto out; +- } +- } +- hct_data.hct_shared_size = HCT_SHARED_MEMORY_SIZE; +- + /* assign a page to the virtual BAR3 of each CCP. */ + ret = hct_shared_memory_init(); + if (ret) +@@ -2376,47 +2375,3 @@ static uint32_t hct_get_bit(unsigned long *bitmap, int n) + { + return ((bitmap[WORD_OFFSET(n)] & (0x1UL << BIT_OFFSET(n))) != 0); + } +- +-static int hct_create_user_shared_memory(const char *name, size_t size) +-{ +- mode_t oldmod; +- void *addr = NULL; +- int shm_fd = -1; +- int new_mem = 0; +- +- oldmod = umask(0); +- shm_fd = shm_open(name, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, 0666); +- umask(oldmod); +- +- if (shm_fd < 0) { +- if (errno == EEXIST) { +- error_report("Shared memory %s already exists, trying to open existing one\n", name); +- return 0; +- } else { +- error_report("Failed to create/open shared memory %s, errno %d (%s)\n", name, errno, strerror(errno)); +- return -1; +- } +- } else { +- new_mem = 1; +- } +- +- if (ftruncate(shm_fd, size) != 0) { +- error_report("Failed to set shared memory size %zu, errno %d (%s)\n", size, errno, strerror(errno)); +- close(shm_fd); +- return -1; +- } +- +- if (new_mem) { +- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); +- if (addr == MAP_FAILED) { +- error_report("Failed to mmap shared memory %s, errno %d (%s)\n", name, errno, strerror(errno)); +- close(shm_fd); +- return -1; +- } +- memset(addr, 0, size); +- munmap(addr, size); +- } +- +- close(shm_fd); +- return 0; +-} +-- +2.43.5 + diff --git a/1174-hw-vfio-hct-support-abort-migration-when-hct-excepti.patch b/1174-hw-vfio-hct-support-abort-migration-when-hct-excepti.patch new file mode 100644 index 0000000000000000000000000000000000000000..3c51900ad6bcaf27c70c8a7586c5554dde75088e --- /dev/null +++ b/1174-hw-vfio-hct-support-abort-migration-when-hct-excepti.patch @@ -0,0 +1,98 @@ +From 235ea33d82729ec5a8bc1101a7d5a676c46bde80 Mon Sep 17 00:00:00 2001 +From: Yabin Li +Date: Fri, 31 Oct 2025 14:10:58 +0800 +Subject: [PATCH 5/5] hw/vfio/hct: support abort migration when hct exception + +Change-Id: Ic97b2bbfa9853c34def774aa6632203034c07e9e +--- + hw/vfio/hct.c | 47 ++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 36 insertions(+), 11 deletions(-) + +diff --git a/hw/vfio/hct.c b/hw/vfio/hct.c +index 146439a71..3cb549a0a 100644 +--- a/hw/vfio/hct.c ++++ b/hw/vfio/hct.c +@@ -320,6 +320,7 @@ typedef struct HctDevState { + int group_fd; /* vfio group fd */ + int group_id; /* vfio group id */ + int lock_fd; /* vccp flock fd for this device only */ ++ bool migrate_abort_err; + } HCTDevState; + + struct hct_dev_ctrl { +@@ -495,6 +496,7 @@ static Property vfio_hct_properties[] = { + DEFINE_PROP_STRING("sysfsdev", HCTDevState, vdev.sysfsdev), + DEFINE_PROP_STRING("dev", HCTDevState, ccp_dev_path), + DEFINE_PROP_STRING("vsock-device", HCTDevState, vsock_device), ++ DEFINE_PROP_BOOL("migrate-abort-on-error", HCTDevState, migrate_abort_err, false), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -1057,10 +1059,17 @@ static int hct_migrate_precopy_notifier(NotifierWithReturn *notifier, void *data + /* We believe that the virtual machine is not ready, + * so terminate the live migration. + */ +- error_setg(pnd->errp, "%s[%u] msg[3]:0x%02x, invalid.\n", +- __func__, __LINE__, msg[3]); +- ms->state = MIGRATION_STATUS_CANCELLED; +- return -1; ++ if (state->migrate_abort_err) { ++ error_setg(pnd->errp, "%s[%u] msg[3]:0x%02x invalid, notifier fail.\n", ++ __func__, __LINE__, msg[3]); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } else { ++ error_report("%s[%u] msg[3]:0x%02x invalid, notifier fail.\n", ++ __func__, __LINE__, msg[3]); ++ state->migrate_support = 0; ++ return 0; ++ } + } + + while (++loops <= MAX_CHECK_LOOPS) { +@@ -1070,19 +1079,35 @@ static int hct_migrate_precopy_notifier(NotifierWithReturn *notifier, void *data + msg[3] = 0; + ret = hct_client_send_msg(state, (char *)msg, sizeof(msg), MAX_CONNECT_LOOPS); + if (ret != 0 || msg[0] != HCT_MIG_MSG_MAGIC) { +- error_setg(pnd->errp, "ret:%d msg[0]:0x%x, invalid.", ret, msg[0]); +- ms->state = MIGRATION_STATUS_CANCELLED; +- return -1; ++ if (state->migrate_abort_err) { ++ error_setg(pnd->errp, "%s[%u] ret:%d msg[0]:0x%x invalid, notifier fail.\n", ++ __func__, __LINE__, ret, msg[0]); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } else { ++ error_report("%s[%u] ret:%d msg[0]:0x%x invalid, notifier fail.\n", ++ __func__, __LINE__, ret, msg[0]); ++ state->migrate_support = 0; ++ return 0; ++ } + } else if (msg[3] == HCT_MIG_STATE_STOPPED) { + break; + } + sleep(1); + } + if (loops > MAX_CHECK_LOOPS) { +- error_setg(pnd->errp, "%s[%u] loops:%d > MAX_CHECK_LOOPS:%d, will cancel.\n", +- __func__, __LINE__, loops, MAX_CHECK_LOOPS); +- ms->state = MIGRATION_STATUS_CANCELLED; +- return -1; ++ if (state->migrate_abort_err) { ++ error_setg(pnd->errp, "%s[%u] loops:%d > MAX_CHECK_LOOPS:%d," ++ " the live migration will be canceled.\n", ++ __func__, __LINE__, loops, MAX_CHECK_LOOPS); ++ ms->state = MIGRATION_STATUS_CANCELLED; ++ return -1; ++ } else { ++ error_report("%s[%u] loops:%d > MAX_CHECK_LOOPS:%d, hct live migration fail.\n", ++ __func__, __LINE__, loops, MAX_CHECK_LOOPS); ++ state->migrate_support = 0; ++ return 0; ++ } + } + + info_report("%s: recieved HCT_MIG_MSG_ACK.", __func__); +-- +2.43.5 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b011ee184230080f9f95eb3e5919ebb2d93c3d45..8d5e0f9bf036b0b2480c59c326cd6286991da75f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -1,4 +1,4 @@ -%define anolis_release .0.8 +%define anolis_release .0.9 %global SLOF_gittagdate 20191022 %global SLOF_gittagcommit 899d9883 @@ -1090,7 +1090,14 @@ Patch1167: 1167-docs-Add-GNR-SRF-and-CWF-CPU-models.patch Patch1168: 1168-target-i386-add-sha512-sm3-sm4-feature-bits.patch # Support Hygon TKM (Trusted Key Management) Guest live migration -Patch1169: hw-misc-psp-support-live-migrate-for-vpsp-device.patch +Patch1169: 1169-hw-misc-psp-support-live-migrate-for-vpsp-device.patch + +# Support Hygon HCT vCCP Guest live migration +Patch1170: 1170-hw-vfio-hct-support-start-with-ccp.ko-driver.patch +Patch1171: 1171-hw-vfio-hct-support-vfio-pci-multiple-processes.patch +Patch1172: 1172-hw-vfio-hct-support-live-migration-function-for-virt.patch +Patch1173: 1173-hw-vfio-hct-fix-virtual-machine-paused-due-to-obtain.patch +Patch1174: 1174-hw-vfio-hct-support-abort-migration-when-hct-excepti.patch BuildRequires: wget BuildRequires: rpm-build @@ -1600,7 +1607,7 @@ pushd %{qemu_kvm_build} --localstatedir="%{_localstatedir}" \ --docdir="%{_docdir}" \ --libexecdir="%{_libexecdir}" \ - --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now -lrt" \ --extra-cflags="%{optflags}" \ --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ @@ -2329,9 +2336,17 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %endif %changelog -* Thu Oct 09 2025 Mengbiao Xiong +* Tue Nov 18 2025 yangdepei - 6.2.0-53.0.8.8 +- Support Hygon HCT vCCP Guest live migration +- 1170-hw-vfio-hct-support-start-with-ccp.ko-driver.patch +- 1171-hw-vfio-hct-support-vfio-pci-multiple-processes.patch +- 1172-hw-vfio-hct-support-live-migration-function-for-virt.patch +- 1173-hw-vfio-hct-fix-virtual-machine-paused-due-to-obtain.patch +- 1174-hw-vfio-hct-support-abort-migration-when-hct-excepti.patch + +* Thu Oct 09 2025 Mengbiao Xiong - 6.2.0-53.0.8.7 - Support Hygon TKM (Trusted Key Management) Guest live migration -- hw-misc-psp-support-live-migrate-for-vpsp-device.patch +- 1169-hw-misc-psp-support-live-migrate-for-vpsp-device.patch * Tue Sep 16 2025 Quanxian Wang - 6.2.0-53.0.8.6 - Clearwater Forrest(CWF) Support