# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1192097770 -32400 # Node ID 8321f8577a60f64b4999328183be49c5fa2c7c69 # Parent 6f9435bb6a195a52cb43799b82cc24cdc7a298f3 libxc: vti domain save/restore support PATCHNAME: libxc_vti_domain_save_restore Signed-off-by: Isaku Yamahata diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Thu Oct 11 19:10:56 2007 +0900 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Thu Oct 11 19:16:10 2007 +0900 @@ -8,6 +8,7 @@ * * Copyright (c) 2007 Isaku Yamahata * Use foreign p2m exposure. + * VTi domain support */ #include @@ -17,6 +18,7 @@ #include "xc_ia64_save_restore.h" #include "xc_ia64.h" #include "xc_efi.h" +#include "xen/hvm/params.h" #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) @@ -75,250 +77,116 @@ read_page(int xc_handle, int io_fd, uint return 0; } -int -xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn, - unsigned int hvm, unsigned int pae) -{ - DECLARE_DOMCTL; - int rc = 1; +/* + * Get the list of PFNs that are not in the psuedo-phys map. + * Although we allocate pages on demand, balloon driver may + * decreased simaltenously. So we have to free the freed + * pages here. + */ +static int +xc_ia64_recv_unallocated_list(int xc_handle, int io_fd, uint32_t dom, + struct xen_ia64_p2m_table *p2m_table) +{ + int rc = -1; unsigned int i; - unsigned long gmfn; - unsigned long ver; - - /* The new domain's shared-info frame number. */ - unsigned long shared_info_frame; - unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ - shared_info_t *shared_info = (shared_info_t *)shared_info_page; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; - - /* A temporary mapping of the guest's start_info page. */ - start_info_t *start_info; - - struct xen_ia64_p2m_table p2m_table; - xc_ia64_p2m_init(&p2m_table); - - if (hvm) { - ERROR("HVM Restore is unsupported"); - goto out; - } - - /* For info only */ - nr_pfns = 0; - - if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) - { - ERROR("read: p2m_size"); - goto out; - } - DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size); - - if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { - ERROR("Error when reading version"); - goto out; - } - if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != XC_IA64_SR_FORMAT_VER_TWO) { - ERROR("version of save doesn't match"); - goto out; - } - - if (lock_pages(&ctxt, sizeof(ctxt))) { - /* needed for build domctl, but might as well do early */ - ERROR("Unable to lock_pages ctxt"); - return 1; - } - - if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { - ERROR("read: domain setup"); - goto out; - } - - /* Build firmware (will be overwritten). */ - domctl.domain = (domid_t)dom; - domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query; - domctl.u.arch_setup.bp = 0; /* indicate domain restore */ + unsigned int count; + unsigned long *pfntab = NULL; + unsigned int nr_frees; + + if (!read_exact(io_fd, &count, sizeof(count))) { + ERROR("Error when reading pfn count"); + goto out; + } + + pfntab = malloc(sizeof(unsigned long) * count); + if (pfntab == NULL) { + ERROR("Out of memory"); + goto out; + } + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + ERROR("Error when reading pfntab"); + goto out; + } + + nr_frees = 0; + for (i = 0; i < count; i++) { + if (xc_ia64_p2m_allocated(p2m_table, pfntab[i])) { + pfntab[nr_frees] = pfntab[i]; + nr_frees++; + } + } + if (nr_frees > 0) { + if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees, + 0, pfntab) < 0) { + PERROR("Could not decrease reservation"); + goto out; + } + else + DPRINTF("Decreased reservation by %d / %d pages\n", + nr_frees, count); + } + + rc = 0; - domctl.cmd = XEN_DOMCTL_arch_setup; - if (xc_domctl(xc_handle, &domctl)) - goto out; - - /* Get the domain's shared-info frame. */ - domctl.cmd = XEN_DOMCTL_getdomaininfo; - domctl.domain = (domid_t)dom; - if (xc_domctl(xc_handle, &domctl) < 0) { - ERROR("Could not get information on new domain"); - goto out; - } - shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; - - if (ver == XC_IA64_SR_FORMAT_VER_TWO) { - unsigned int memmap_info_num_pages; - unsigned long memmap_size; - xen_ia64_memmap_info_t *memmap_info; - - if (!read_exact(io_fd, &memmap_info_num_pages, - sizeof(memmap_info_num_pages))) { - ERROR("read: memmap_info_num_pages"); - goto out; - } - memmap_size = memmap_info_num_pages * PAGE_SIZE; - memmap_info = malloc(memmap_size); - if (memmap_info == NULL) { - ERROR("Could not allocate memory for memmap_info"); - goto out; - } - if (!read_exact(io_fd, memmap_info, memmap_size)) { - ERROR("read: memmap_info"); - goto out; - } - if (xc_ia64_p2m_map(&p2m_table, xc_handle, - dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) { - ERROR("p2m mapping"); - goto out; - } - free(memmap_info); - } else if (ver == XC_IA64_SR_FORMAT_VER_ONE) { - xen_ia64_memmap_info_t *memmap_info; - efi_memory_desc_t *memdesc; - uint64_t buffer[(sizeof(*memmap_info) + sizeof(*memdesc) + - sizeof(uint64_t) - 1) / sizeof(uint64_t)]; - - memset(buffer, 0, sizeof(buffer)); - memmap_info = (xen_ia64_memmap_info_t *)buffer; - memdesc = (efi_memory_desc_t*)&memmap_info->memdesc[0]; - memmap_info->efi_memmap_size = sizeof(*memmap_info) + sizeof(*memdesc); - memmap_info->efi_memdesc_size = sizeof(*memdesc); - memmap_info->efi_memdesc_version = EFI_MEMORY_DESCRIPTOR_VERSION; - - memdesc->type = EFI_MEMORY_DESCRIPTOR_VERSION; - memdesc->phys_addr = 0; - memdesc->virt_addr = 0; - memdesc->num_pages = nr_pfns << (PAGE_SHIFT - EFI_PAGE_SHIFT); - memdesc->attribute = EFI_MEMORY_WB; - - if (xc_ia64_p2m_map(&p2m_table, xc_handle, - dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) { - ERROR("p2m mapping"); - goto out; - } - } else { - ERROR("unknown version"); - goto out; - } - - DPRINTF("Reloading memory pages: 0%%\n"); - - while (1) { - if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) { - ERROR("Error when reading batch size"); - goto out; - } - if (gmfn == INVALID_MFN) - break; - - if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 0) { - ERROR("can not populate page 0x%lx", gmfn); - goto out; - } - if (read_page(xc_handle, io_fd, dom, gmfn) < 0) - goto out; - } - - DPRINTF("Received all pages\n"); - - /* - * Get the list of PFNs that are not in the psuedo-phys map. - * Although we allocate pages on demand, balloon driver may - * decreased simaltenously. So we have to free the freed - * pages here. - */ - { - unsigned int count; - unsigned long *pfntab; - unsigned int nr_frees; - - if (!read_exact(io_fd, &count, sizeof(count))) { - ERROR("Error when reading pfn count"); - goto out; - } - - pfntab = malloc(sizeof(unsigned long) * count); - if (!pfntab) { - ERROR("Out of memory"); - goto out; - } - - if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { - ERROR("Error when reading pfntab"); - free(pfntab); - goto out; - } - - nr_frees = 0; - for (i = 0; i < count; i++) { - if (xc_ia64_p2m_allocated(&p2m_table, pfntab[i])) { - pfntab[nr_frees] = pfntab[i]; - nr_frees++; - } - } - if (nr_frees > 0) { - if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees, - 0, pfntab) < 0) { - ERROR("Could not decrease reservation : %d", rc); - free(pfntab); - goto out; - } - else - DPRINTF("Decreased reservation by %d / %d pages\n", - nr_frees, count); - } + out: + if (pfntab != NULL) free(pfntab); - } - - if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + return rc; +} + +static int +xc_ia64_recv_vcpu_context(int xc_handle, int io_fd, uint32_t dom, + uint32_t vcpu, vcpu_guest_context_t *ctxt) +{ + if (!read_exact(io_fd, ctxt, sizeof(*ctxt))) { ERROR("Error when reading ctxt"); - goto out; - } - - fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]); + return -1; + } + + fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]); /* Initialize and set registers. */ - ctxt.flags = VGCF_EXTRA_REGS; - domctl.cmd = XEN_DOMCTL_setvcpucontext; - domctl.domain = (domid_t)dom; - domctl.u.vcpucontext.vcpu = 0; - set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt); - if (xc_domctl(xc_handle, &domctl) != 0) { + ctxt->flags = VGCF_EXTRA_REGS; + if (xc_vcpu_setcontext(xc_handle, dom, vcpu, ctxt) != 0) { ERROR("Couldn't set vcpu context"); - goto out; + return -1; } /* Just a check. */ - if (xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { + ctxt->flags = 0; + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) { ERROR("Could not get vcpu context"); - goto out; - } - - /* Then get privreg page. */ - if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { - ERROR("Could not read vcpu privregs"); - goto out; - } - + return -1; + } + + return 0; +} + +/* Read shared info. */ +static int +xc_ia64_recv_shared_info(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + unsigned long *start_info_pfn) +{ + unsigned int i; + + /* The new domain's shared-info frame. */ + shared_info_t *shared_info; + /* Read shared info. */ shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, shared_info_frame); + PROT_READ|PROT_WRITE, + shared_info_frame); if (shared_info == NULL) { - ERROR("cannot map page"); - goto out; - } + ERROR("cannot map page"); + return -1; + } + if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { - ERROR("Error when reading shared_info page"); - munmap(shared_info, PAGE_SIZE); - goto out; + ERROR("Error when reading shared_info page"); + munmap(shared_info, PAGE_SIZE); + return -1; } /* clear any pending events and the selector */ @@ -327,12 +195,53 @@ xc_domain_restore(int xc_handle, int io_ for (i = 0; i < MAX_VIRT_CPUS; i++) shared_info->vcpu_info[i].evtchn_pending_sel = 0; - gmfn = shared_info->arch.start_info_pfn; + if (start_info_pfn != NULL) + *start_info_pfn = shared_info->arch.start_info_pfn; munmap (shared_info, PAGE_SIZE); + return 0; +} + +static int +xc_ia64_pv_recv_context(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + struct xen_ia64_p2m_table *p2m_table, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int rc = -1; + unsigned long gmfn; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + if (lock_pages(&ctxt, sizeof(ctxt))) { + /* needed for build domctl, but might as well do early */ + ERROR("Unable to lock_pages ctxt"); + return -1; + } + + if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt)) + goto out; + + /* Then get privreg page. */ + if (read_page(xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { + ERROR("Could not read vcpu privregs"); + goto out; + } + + /* Read shared info. */ + if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, + shared_info_frame, &gmfn)) + goto out; + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ - if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) { + if (populate_page_if_necessary(xc_handle, dom, gmfn, p2m_table)) { ERROR("cannot populate page 0x%lx", gmfn); goto out; } @@ -350,6 +259,323 @@ xc_domain_restore(int xc_handle, int io_ *console_mfn = start_info->console.domU.mfn; start_info->console.domU.evtchn = console_evtchn; munmap(start_info, PAGE_SIZE); + + rc = 0; + + out: + unlock_pages(&ctxt, sizeof(ctxt)); + return rc; +} + +static int +xc_ia64_hvm_recv_context(int xc_handle, int io_fd, uint32_t dom, + unsigned long shared_info_frame, + struct xen_ia64_p2m_table *p2m_table, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, + unsigned long *console_mfn) +{ + int rc = -1; + xc_dominfo_t info; + unsigned int i; + + /* cpu */ + uint64_t max_virt_cpus; + unsigned long vcpumap_size; + uint64_t *vcpumap = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms */ + const int hvm_params[] = { + HVM_PARAM_IOREQ_PFN, + HVM_PARAM_BUFIOREQ_PFN, + HVM_PARAM_STORE_PFN, + }; + const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]); + /* ioreq_pfn, bufioreq_pfn, store_pfn */ + uint64_t magic_pfns[NR_PARAMS]; + + /* HVM: a buffer for holding HVM contxt */ + uint64_t rec_size = 0; + uint8_t *hvm_buf = NULL; + + /* Read shared info. */ + if (xc_ia64_recv_shared_info(xc_handle, io_fd, dom, shared_info_frame, + NULL)) + goto out; + + /* vcpu map */ + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERROR("Could not get domain info"); + goto out; + } + if (!read_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) { + ERROR("error reading max_virt_cpus"); + goto out; + } + if (max_virt_cpus < info.max_vcpu_id) { + ERROR("too large max_virt_cpus %i < %i\n", + max_virt_cpus, info.max_vcpu_id); + goto out; + } + vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) / + sizeof(vcpumap[0]); + vcpumap = malloc(vcpumap_size); + if (vcpumap == NULL) { + ERROR("memory alloc for vcpumap"); + goto out; + } + memset(vcpumap, 0, vcpumap_size); + if (!read_exact(io_fd, vcpumap, vcpumap_size)) { + ERROR("read vcpumap"); + goto out; + } + + /* vcpu context */ + for (i = 0; i <= info.max_vcpu_id; i++) { + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + if (!__test_bit(i, vcpumap)) + continue; + + if (xc_ia64_recv_vcpu_context(xc_handle, io_fd, dom, i, &ctxt)) + goto out; + + // system context of vcpu is recieved as hvm context. + } + + /* Set HVM-specific parameters */ + if (!read_exact(io_fd, magic_pfns, sizeof(magic_pfns))) { + ERROR("error reading magic page addresses"); + goto out; + } + + /* These comms pages need to be zeroed at the start of day */ + for (i = 0; i < NR_PARAMS; i++) { + rc = xc_clear_domain_page(xc_handle, dom, magic_pfns[i]); + if (rc != 0) { + ERROR("error zeroing magic pages: %i", rc); + goto out; + } + rc = xc_set_hvm_param(xc_handle, dom, hvm_params[i], magic_pfns[i]); + if (rc != 0) { + ERROR("error setting HVM params: %i", rc); + goto out; + } + } + rc = xc_set_hvm_param(xc_handle, dom, + HVM_PARAM_STORE_EVTCHN, store_evtchn); + if (rc != 0) { + ERROR("error setting HVM params: %i", rc); + goto out; + } + *store_mfn = magic_pfns[2]; + + /* Read HVM context */ + if (!read_exact(io_fd, &rec_size, sizeof(rec_size))) { + ERROR("error read hvm context size!\n"); + goto out; + } + + hvm_buf = malloc(rec_size); + if (hvm_buf == NULL) { + ERROR("memory alloc for hvm context buffer failed"); + errno = ENOMEM; + goto out; + } + + if (!read_exact(io_fd, hvm_buf, rec_size)) { + ERROR("error loading the HVM context"); + goto out; + } + + rc = xc_domain_hvm_setcontext(xc_handle, dom, hvm_buf, rec_size); + if (rc != 0) { + ERROR("error setting the HVM context"); + goto out; + } + + rc = 0; + +out: + if (vcpumap != NULL) + free(vcpumap); + if (hvm_buf != NULL) + free(hvm_buf); + return rc; +} + +/* + * hvm domain requires IO pages allocated when XEN_DOMCTL_arch_setup + */ +static int +xc_ia64_hvm_domain_setup(int xc_handle, uint32_t dom) +{ + int rc; + xen_pfn_t pfn_list[] = { + IO_PAGE_START >> PAGE_SHIFT, + BUFFER_IO_PAGE_START >> PAGE_SHIFT, + BUFFER_PIO_PAGE_START >> PAGE_SHIFT, + }; + unsigned long nr_pages = sizeof(pfn_list) / sizeof(pfn_list[0]); + + rc = xc_domain_memory_populate_physmap(xc_handle, dom, nr_pages, + 0, 0, &pfn_list[0]); + if (rc != 0) + PERROR("Could not allocate IO page or buffer io page.\n"); + return rc; +} + +int +xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn, + unsigned int hvm, unsigned int pae) +{ + DECLARE_DOMCTL; + int rc = 1; + unsigned long ver; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + struct xen_ia64_p2m_table p2m_table; + xc_ia64_p2m_init(&p2m_table); + + /* For info only */ + nr_pfns = 0; + + if ( !read_exact(io_fd, &p2m_size, sizeof(unsigned long)) ) + { + ERROR("read: p2m_size"); + goto out; + } + DPRINTF("xc_linux_restore start: p2m_size = %lx\n", p2m_size); + + if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { + ERROR("Error when reading version"); + goto out; + } + if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != XC_IA64_SR_FORMAT_VER_TWO) { + ERROR("version of save doesn't match"); + goto out; + } + + if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { + ERROR("read: domain setup"); + goto out; + } + + if (hvm && xc_ia64_hvm_domain_setup(xc_handle, dom) != 0) + goto out; + + /* Build firmware (will be overwritten). */ + domctl.domain = (domid_t)dom; + domctl.u.arch_setup.flags &= ~XEN_DOMAINSETUP_query; + domctl.u.arch_setup.bp = 0; /* indicate domain restore */ + + domctl.cmd = XEN_DOMCTL_arch_setup; + if (xc_domctl(xc_handle, &domctl)) + goto out; + + /* Get the domain's shared-info frame. */ + domctl.cmd = XEN_DOMCTL_getdomaininfo; + domctl.domain = (domid_t)dom; + if (xc_domctl(xc_handle, &domctl) < 0) { + ERROR("Could not get information on new domain"); + goto out; + } + shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + + if (ver == XC_IA64_SR_FORMAT_VER_TWO) { + unsigned int memmap_info_num_pages; + unsigned long memmap_size; + xen_ia64_memmap_info_t *memmap_info; + + if (!read_exact(io_fd, &memmap_info_num_pages, + sizeof(memmap_info_num_pages))) { + ERROR("read: memmap_info_num_pages"); + goto out; + } + memmap_size = memmap_info_num_pages * PAGE_SIZE; + memmap_info = malloc(memmap_size); + if (memmap_info == NULL) { + ERROR("Could not allocate memory for memmap_info"); + goto out; + } + if (!read_exact(io_fd, memmap_info, memmap_size)) { + ERROR("read: memmap_info"); + goto out; + } + if (xc_ia64_p2m_map(&p2m_table, xc_handle, + dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) { + ERROR("p2m mapping"); + goto out; + } + free(memmap_info); + } else if (ver == XC_IA64_SR_FORMAT_VER_ONE) { + xen_ia64_memmap_info_t *memmap_info; + efi_memory_desc_t *memdesc; + uint64_t buffer[(sizeof(*memmap_info) + sizeof(*memdesc) + + sizeof(uint64_t) - 1) / sizeof(uint64_t)]; + + memset(buffer, 0, sizeof(buffer)); + memmap_info = (xen_ia64_memmap_info_t *)buffer; + memdesc = (efi_memory_desc_t*)&memmap_info->memdesc[0]; + memmap_info->efi_memmap_size = sizeof(*memmap_info) + sizeof(*memdesc); + memmap_info->efi_memdesc_size = sizeof(*memdesc); + memmap_info->efi_memdesc_version = EFI_MEMORY_DESCRIPTOR_VERSION; + + memdesc->type = EFI_MEMORY_DESCRIPTOR_VERSION; + memdesc->phys_addr = 0; + memdesc->virt_addr = 0; + memdesc->num_pages = nr_pfns << (PAGE_SHIFT - EFI_PAGE_SHIFT); + memdesc->attribute = EFI_MEMORY_WB; + + if (xc_ia64_p2m_map(&p2m_table, xc_handle, + dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) { + ERROR("p2m mapping"); + goto out; + } + } else { + ERROR("unknown version"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + while (1) { + unsigned long gmfn; + if (!read_exact(io_fd, &gmfn, sizeof(unsigned long))) { + ERROR("Error when reading batch size"); + goto out; + } + if (gmfn == INVALID_MFN) + break; + + if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 0) { + ERROR("can not populate page 0x%lx", gmfn); + goto out; + } + if (read_page(xc_handle, io_fd, dom, gmfn) < 0) + goto out; + } + + DPRINTF("Received all pages\n"); + + if (xc_ia64_recv_unallocated_list(xc_handle, io_fd, dom, &p2m_table)) + goto out; + + if (!hvm) + rc = xc_ia64_pv_recv_context(xc_handle, io_fd, dom, shared_info_frame, + &p2m_table, store_evtchn, store_mfn, + console_evtchn, console_mfn); + else + rc = xc_ia64_hvm_recv_context(xc_handle, io_fd, dom, shared_info_frame, + &p2m_table, store_evtchn, store_mfn, + console_evtchn, console_mfn); + if (rc) + goto out; /* * Safety checking of saved context: @@ -368,12 +594,10 @@ xc_domain_restore(int xc_handle, int io_ rc = 0; out: + xc_ia64_p2m_unmap(&p2m_table); + if ((rc != 0) && (dom != 0)) xc_domain_destroy(xc_handle, dom); - - xc_ia64_p2m_unmap(&p2m_table); - - unlock_pages(&ctxt, sizeof(ctxt)); DPRINTF("Restore exit with rc=%d\n", rc); diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Thu Oct 11 19:10:56 2007 +0900 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Thu Oct 11 19:16:10 2007 +0900 @@ -8,6 +8,7 @@ * * Copyright (c) 2007 Isaku Yamahata * Use foreign p2m exposure. + * VTi domain support. */ #include @@ -20,6 +21,7 @@ #include "xc_ia64.h" #include "xc_ia64_save_restore.h" #include "xc_efi.h" +#include "xen/hvm/params.h" /* ** Default values for important tuning parameters. Can override by passing @@ -35,14 +37,6 @@ ** During (live) save/migrate, we maintain a number of bitmaps to track ** which pages we have to send, and to skip. */ - -#define BITS_PER_LONG (sizeof(unsigned long) * 8) - -#define BITMAP_ENTRY(_nr,_bmap) \ - ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] - -#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) - static inline int test_bit(int nr, volatile void * addr) { return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; @@ -136,6 +130,271 @@ retry: return -1; } +static inline int +md_is_not_ram(const efi_memory_desc_t *md) +{ + return ((md->type != EFI_CONVENTIONAL_MEMORY) || + (md->attribute != EFI_MEMORY_WB) || + (md->num_pages == 0)); +} + +/* + * Send through a list of all the PFNs that were not in map at the close. + * We send pages which was allocated. However balloon driver may + * decreased after sending page. So we have to check the freed + * page after pausing the domain. + */ +static int +xc_ia64_send_unallocated_list(int xc_handle, int io_fd, + struct xen_ia64_p2m_table *p2m_table, + xen_ia64_memmap_info_t *memmap_info, + void *memmap_desc_start, void *memmap_desc_end) +{ + void *p; + efi_memory_desc_t *md; + + unsigned long N; + unsigned long pfntab[1024]; + unsigned int j; + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + + if (md_is_not_ram(md)) + continue; + + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(p2m_table, N)) + j++; + } + } + if (!write_exact(io_fd, &j, sizeof(unsigned int))) { + ERROR("Error when writing to state file (6a)"); + return -1; + } + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + + if (md_is_not_ram(md)) + continue; + + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(p2m_table, N)) + pfntab[j++] = N; + if (j == sizeof(pfntab)/sizeof(pfntab[0])) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6b)"); + return -1; + } + j = 0; + } + } + } + if (j > 0) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6c)"); + return -1; + } + } + + return 0; +} + +static int +xc_ia64_send_vcpu_context(int xc_handle, int io_fd, uint32_t dom, + uint32_t vcpu, vcpu_guest_context_t *ctxt) +{ + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, ctxt)) { + ERROR("Could not get vcpu context"); + return -1; + } + + if (!write_exact(io_fd, ctxt, sizeof(*ctxt))) { + ERROR("Error when writing to state file (1)"); + return -1; + } + + fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]); + return 0; +} + +static int +xc_ia64_send_shared_info(int xc_handle, int io_fd, shared_info_t *live_shinfo) +{ + if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + ERROR("Error when writing to state file (1)"); + return -1; + } + return 0; +} + +static int +xc_ia64_pv_send_context(int xc_handle, int io_fd, uint32_t dom, + shared_info_t *live_shinfo) +{ + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + char *mem; + + if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, 0, &ctxt)) + return -1; + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, ctxt.privregs_pfn); + if (mem == NULL) { + ERROR("cannot map privreg page"); + return -1; + } + if (!write_exact(io_fd, mem, PAGE_SIZE)) { + ERROR("Error when writing privreg to state file (5)"); + munmap(mem, PAGE_SIZE); + return -1; + } + munmap(mem, PAGE_SIZE); + + if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo)) + return -1; + + return 0; +} + +static int +xc_ia64_hvm_send_context(int xc_handle, int io_fd, uint32_t dom, + const xc_dominfo_t *info, shared_info_t *live_shinfo) +{ + int rc = -1; + unsigned int i; + + /* vcpu map */ + uint64_t max_virt_cpus; + unsigned long vcpumap_size; + uint64_t *vcpumap = NULL; + + /* HVM: magic frames for ioreqs and xenstore comms */ + const int hvm_params[] = { + HVM_PARAM_IOREQ_PFN, + HVM_PARAM_BUFIOREQ_PFN, + HVM_PARAM_STORE_PFN, + }; + const int NR_PARAMS = sizeof(hvm_params) / sizeof(hvm_params[0]); + /* ioreq_pfn, bufioreq_pfn, store_pfn */ + uint64_t magic_pfns[NR_PARAMS]; + + /* HVM: a buffer for holding HVM contxt */ + uint64_t rec_size; + uint64_t hvm_buf_size = 0; + uint8_t *hvm_buf = NULL; + + if (xc_ia64_send_shared_info(xc_handle, io_fd, live_shinfo)) + return -1; + + /* vcpu map */ + max_virt_cpus = MAX_VIRT_CPUS; + vcpumap_size = (max_virt_cpus + 1 + sizeof(vcpumap[0]) - 1) / + sizeof(vcpumap[0]); + vcpumap = malloc(vcpumap_size); + if (vcpumap == NULL) { + ERROR("memory alloc for vcpumap"); + goto out; + } + memset(vcpumap, 0, vcpumap_size); + + for (i = 0; i <= info->max_vcpu_id; i++) { + xc_vcpuinfo_t vinfo; + if ((xc_vcpu_getinfo(xc_handle, dom, i, &vinfo) == 0) && vinfo.online) + __set_bit(i, vcpumap); + } + + if (!write_exact(io_fd, &max_virt_cpus, sizeof(max_virt_cpus))) { + ERROR("write max_virt_cpus"); + goto out; + } + + if (!write_exact(io_fd, vcpumap, vcpumap_size)) { + ERROR("write vcpumap"); + goto out; + } + + /* vcpu context */ + for (i = 0; i <= info->max_vcpu_id; i++) { + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + if (!__test_bit(i, vcpumap)) + continue; + + if (xc_ia64_send_vcpu_context(xc_handle, io_fd, dom, i, &ctxt)) + goto out; + + // system context of vcpu is sent as hvm context. + } + + /* Save magic-page locations. */ + memset(magic_pfns, 0, sizeof(magic_pfns)); + for (i = 0; i < NR_PARAMS; i++) { + if (xc_get_hvm_param(xc_handle, dom, hvm_params[i], &magic_pfns[i])) { + PERROR("Error when xc_get_hvm_param"); + goto out; + } + } + + if (!write_exact(io_fd, magic_pfns, sizeof(magic_pfns))) { + ERROR("Error when writing to state file (7)"); + goto out; + } + + /* Need another buffer for HVM context */ + hvm_buf_size = xc_domain_hvm_getcontext(xc_handle, dom, 0, 0); + if (hvm_buf_size == -1) { + ERROR("Couldn't get HVM context size from Xen"); + goto out; + } + + hvm_buf = malloc(hvm_buf_size); + if (!hvm_buf) { + ERROR("Couldn't allocate memory"); + goto out; + } + + /* Get HVM context from Xen and save it too */ + rec_size = xc_domain_hvm_getcontext(xc_handle, dom, hvm_buf, hvm_buf_size); + if (rec_size == -1) { + ERROR("HVM:Could not get hvm buffer"); + goto out; + } + + if (!write_exact(io_fd, &rec_size, sizeof(rec_size))) { + ERROR("error write hvm buffer size"); + goto out; + } + + if (!write_exact(io_fd, hvm_buf, rec_size)) { + ERROR("write HVM info failed!\n"); + goto out; + } + + rc = 0; +out: + if (hvm_buf != NULL) + free(hvm_buf); + if (vcpumap != NULL) + free(vcpumap); + return rc; +} + int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags, int (*suspend)(int), @@ -147,15 +406,11 @@ xc_domain_save(int xc_handle, int io_fd, int rc = 1; - //int live = (flags & XCFLAGS_LIVE); int debug = (flags & XCFLAGS_DEBUG); int live = (flags & XCFLAGS_LIVE); /* The new domain's shared-info frame number. */ unsigned long shared_info_frame; - - /* A copy of the CPU context of the guest. */ - vcpu_guest_context_t ctxt; /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; @@ -185,6 +440,12 @@ xc_domain_save(int xc_handle, int io_fd, char *mem; + /* HVM: shared-memory bitmaps for getting log-dirty bits from qemu-dm */ + unsigned long *qemu_bitmaps[2]; + int qemu_active = 0; + int qemu_non_active = 1; + + /* for foreign p2m exposure */ unsigned int memmap_info_num_pages; unsigned long memmap_size = 0; xen_ia64_memmap_info_t *memmap_info_live = NULL; @@ -299,6 +560,14 @@ xc_domain_save(int xc_handle, int io_fd, goto out; } + if (hvm) { + /* Get qemu-dm logging dirty pages too */ + void *seg = init_qemu_maps(dom, bitmap_size); + qemu_bitmaps[0] = seg; + qemu_bitmaps[1] = seg + bitmap_size; + qemu_active = 0; + qemu_non_active = 1; + } } else { /* This is a non-live suspend. Issue the call back to get the @@ -374,9 +643,7 @@ xc_domain_save(int xc_handle, int io_fd, p < memmap_desc_end; p += memmap_info->efi_memdesc_size) { md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) + if (md_is_not_ram(md)) continue; for (N = md->phys_addr >> PAGE_SHIFT; @@ -455,11 +722,27 @@ xc_domain_save(int xc_handle, int io_fd, goto out; } + if (hvm) { + unsigned int j; + /* Pull in the dirty bits from qemu-dm too */ + if (!last_iter) { + qemu_active = qemu_non_active; + qemu_non_active = qemu_active ? 0 : 1; + qemu_flip_buffer(dom, qemu_active); + for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) { + to_send[j] |= qemu_bitmaps[qemu_non_active][j]; + qemu_bitmaps[qemu_non_active][j] = 0; + } + } else { + for (j = 0; j < bitmap_size / sizeof(unsigned long); j++) + to_send[j] |= qemu_bitmaps[qemu_active][j]; + } + } + sent_last_iter = sent_this_iter; //print_stats(xc_handle, dom, sent_this_iter, &stats, 1); } - } fprintf(stderr, "All memory is saved\n"); @@ -473,100 +756,18 @@ xc_domain_save(int xc_handle, int io_fd, } } - /* - * Send through a list of all the PFNs that were not in map at the close. - * We send pages which was allocated. However balloon driver may - * decreased after sending page. So we have to check the freed - * page after pausing the domain. - */ - { - unsigned long N; - unsigned long pfntab[1024]; - unsigned int j; - - j = 0; - for (p = memmap_desc_start; - p < memmap_desc_end; - p += memmap_info->efi_memdesc_size) { - md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) - continue; - for (N = md->phys_addr >> PAGE_SHIFT; - N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> - PAGE_SHIFT; - N++) { - if (!xc_ia64_p2m_allocated(&p2m_table, N)) - j++; - } - } - if (!write_exact(io_fd, &j, sizeof(unsigned int))) { - ERROR("Error when writing to state file (6a)"); - goto out; - } - - j = 0; - for (p = memmap_desc_start; - p < memmap_desc_end; - p += memmap_info->efi_memdesc_size) { - md = p; - if (md->type != EFI_CONVENTIONAL_MEMORY || - md->attribute != EFI_MEMORY_WB || - md->num_pages == 0) - continue; - for (N = md->phys_addr >> PAGE_SHIFT; - N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> - PAGE_SHIFT; - N++) { - if (!xc_ia64_p2m_allocated(&p2m_table, N)) - pfntab[j++] = N; - if (j == sizeof(pfntab)/sizeof(pfntab[0])) { - if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { - ERROR("Error when writing to state file (6b)"); - goto out; - } - j = 0; - } - } - } - if (j > 0) { - if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { - ERROR("Error when writing to state file (6b)"); - goto out; - } - } - } - - if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { - ERROR("Could not get vcpu context"); - goto out; - } - - if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { - ERROR("Error when writing to state file (1)"); - goto out; - } - - fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt.regs.ip, ctxt.regs.b[0]); - - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, ctxt.privregs_pfn); - if (mem == NULL) { - ERROR("cannot map privreg page"); - goto out; - } - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERROR("Error when writing privreg to state file (5)"); - munmap(mem, PAGE_SIZE); - goto out; - } - munmap(mem, PAGE_SIZE); - - if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { - ERROR("Error when writing to state file (1)"); - goto out; - } + if (xc_ia64_send_unallocated_list(xc_handle, io_fd, &p2m_table, + memmap_info, + memmap_desc_start, memmap_desc_end)) + goto out; + + if (!hvm) + rc = xc_ia64_pv_send_context(xc_handle, io_fd, dom, live_shinfo); + else + rc = xc_ia64_hvm_send_context(xc_handle, io_fd, + dom, &info, live_shinfo); + if (rc) + goto out; /* Success! */ rc = 0; diff -r 6f9435bb6a19 -r 8321f8577a60 tools/libxc/ia64/xc_ia64_save_restore.h --- a/tools/libxc/ia64/xc_ia64_save_restore.h Thu Oct 11 19:10:56 2007 +0900 +++ b/tools/libxc/ia64/xc_ia64_save_restore.h Thu Oct 11 19:16:10 2007 +0900 @@ -31,6 +31,27 @@ #define XC_IA64_SR_FORMAT_VER_CURRENT XC_IA64_SR_FORMAT_VER_TWO +/* +** During (live) save/migrate, we maintain a number of bitmaps to track +** which pages we have to send, and to skip. +*/ +#define BITS_PER_LONG (sizeof(unsigned long) * 8) + +#define BITMAP_ENTRY(_nr,_bmap) \ + ((unsigned long *)(_bmap))[(_nr)/BITS_PER_LONG] + +#define BITMAP_SHIFT(_nr) ((_nr) % BITS_PER_LONG) + +static inline int __test_bit(int nr, void * addr) +{ + return (BITMAP_ENTRY(nr, addr) >> BITMAP_SHIFT(nr)) & 1; +} + +static inline void __set_bit(int nr, void * addr) +{ + BITMAP_ENTRY(nr, addr) |= (1UL << BITMAP_SHIFT(nr)); +} + #endif /* XC_IA64_SAVE_RESTORE_H */ /*