# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1193036269 -32400 # Node ID 977f5886e288f4c6764e5e12cb6492eff54a6bca # Parent 75280d0f64d5946bf0351a9f2cbc178b26696df1 clean up arch_get/set_info_guest() - update comment in copy_rbs() - don't warn when rbs_size = 0 for cpu initialization case. - remove struct vcpu_guest_context_regs::rbs_nat member which isn't used. and add num_phys_stacked to struct vcpu_guest_context_regs. so far rbs_nat and rbs_rnat isn't, so it is allowed to change the offset of rbs_rnat. - Add check when setting vRR[]. - don't set vRR[] if the value is zero PATCHNAME: clean_up_arch_set_info_guest Signed-off-by: Isaku Yamahata diff -r 75280d0f64d5 -r 977f5886e288 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Tue Oct 16 18:48:05 2007 +0900 +++ b/xen/arch/ia64/xen/domain.c Mon Oct 22 15:57:49 2007 +0900 @@ -634,6 +634,7 @@ int arch_vcpu_reset(struct vcpu *v) return 0; } +/* Here it is assumed that all of the CPUs has same RSE.N_STACKED_PHYS */ static unsigned long num_phys_stacked; static int __init init_num_phys_stacked(void) @@ -820,8 +821,9 @@ void arch_get_info_guest(struct vcpu *v, COPY_FPREG(&c.nat->regs.f[30], &sw->f30); COPY_FPREG(&c.nat->regs.f[31], &sw->f31); - for (i = 0; i < 96; i++) - COPY_FPREG(&c.nat->regs.f[i + 32], &v->arch._thread.fph[i]); + // f32 - f127 + memcpy(&c.nat->regs.f[32], &v->arch._thread.fph[0], + sizeof(v->arch._thread.fph)); #define NATS_UPDATE(reg) \ nats_update(&c.nat->regs.nats, (reg), \ @@ -937,6 +939,8 @@ void arch_get_info_guest(struct vcpu *v, c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1); } + c.nat->regs.num_phys_stacked = num_phys_stacked; + c.nat->privregs_pfn = get_gpfn_from_mfn (virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT); @@ -1096,7 +1100,6 @@ copy_rbs(struct vcpu* v, unsigned long* if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2) goto out; - //XXX TODO // ia64_copy_rbs() uses real cpu's stack register. // So it may fault with an Illigal Operation fault resulting // in panic if rbs_size is too large to load compared to @@ -1108,10 +1111,9 @@ copy_rbs(struct vcpu* v, unsigned long* // we need to copy them by hand without loadrs and flushrs // However even if we implement that, similar issue still occurs // when running guest. CPU context restore routine issues loadrs - // resulting in Illegal Operation fault. For such a case, - // we need to emulate RSE store. - // So it would be better to implement only RSE store emulation - // and copy stacked registers directly into guest RBS. + // resulting in Illegal Operation fault. And what if the vRSE is in + // enforced lazy mode? We can't store any dirty stacked registers + // into RBS without cover or br.call. if (num_regs > num_phys_stacked) { rc = -ENOSYS; gdprintk(XENLOG_WARNING, @@ -1235,10 +1237,6 @@ int arch_set_info_guest(struct vcpu *v, uregs->pr = c.nat->regs.pr; uregs->b0 = c.nat->regs.b[0]; - if (((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff) - gdprintk(XENLOG_INFO, - "rbs stack offset is different! xen 0x%x given 0x%x", - (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff); num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore, (unsigned long*)c.nat->regs.ar.bsp); rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) - @@ -1249,6 +1247,11 @@ int arch_set_info_guest(struct vcpu *v, rbs_size, sizeof (c.nat->regs.rbs)); return -EINVAL; } + if (rbs_size > 0 && + ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff) + gdprintk(XENLOG_INFO, + "rbs stack offset is different! xen 0x%x given 0x%x", + (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff); /* Protection against crazy user code. */ if (!was_initialised) @@ -1274,6 +1277,49 @@ int arch_set_info_guest(struct vcpu *v, sw->ar_bspstore = (unsigned long)((char*)rbs_bottom + dst_rbs_size); } + } + + // inhibit save/restore between cpus of different RSE.N_STACKED_PHYS. + // to avoid nasty issues. + // + // The number of physical stacked general register(RSE.N_STACKED_PHYS) + // isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and + // the value might be exported to user/user process. + // (Linux does via /proc/cpuinfo) + // The SDM says only that the number is cpu implementation specific. + // + // If the number of restoring cpu is different from one of saving cpu, + // the followings or something wrose might happen. + // - Xen VMM itself may panic when issuing loadrs to run guest with + // illegal operation fault + // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of + // restoring CPU + // This case is detected to refuse restore by rbs_copy() + // - guest kernel may panic with illegal operation fault + // When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of + // restoring CPU + // - infomation leak from guest kernel to user process + // When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of + // restoring CPU + // Before returning to user process, kernel should zero clear all + // physical stacked resgisters to prevent kernel bits leak. + // It would be based on RSE.N_STACKED_PHYS (Linux does.). + // On the restored environtment the kernel clears only a part + // of the physical stacked registers. + // - user processes or human operators would be confused. + // RSE.N_STACKED_PHYS might be exported to user process or human + // operators. Actually on linux it is exported via /proc/cpuinfo. + // user processes might use it. + // I don't know any concreate example, but it's possible in theory. + // e.g. thread libraly may allocate RBS area based on the value. + // (Fortunately glibc nptl doesn't) + if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */ + c.nat->regs.num_phys_stacked != num_phys_stacked) { + gdprintk(XENLOG_WARNING, + "num phys stacked is different! " + "xen 0x%lx given 0x%lx", + num_phys_stacked, c.nat->regs.num_phys_stacked); + return -EINVAL; } uregs->r1 = c.nat->regs.r[1]; @@ -1337,9 +1383,9 @@ int arch_set_info_guest(struct vcpu *v, COPY_FPREG(&sw->f30, &c.nat->regs.f[30]); COPY_FPREG(&sw->f31, &c.nat->regs.f[31]); - for (i = 0; i < 96; i++) - COPY_FPREG(&v->arch._thread.fph[i], &c.nat->regs.f[i + 32]); - + // f32 - f127 + memcpy(&v->arch._thread.fph[0], &c.nat->regs.f[32], + sizeof(v->arch._thread.fph)); #define UNAT_UPDATE(reg) \ unat_update(&uregs->eml_unat, &uregs->r ## reg, \ @@ -1434,20 +1480,21 @@ int arch_set_info_guest(struct vcpu *v, /* rr[] must be set before setting itrs[] dtrs[] */ for (i = 0; i < 8; i++) { - //XXX TODO integrity check. - // if invalid value is given, - // vmx_load_all_rr() and load_region_regs() - // result in General exception, reserved register/field - // failt causing panicing xen. + unsigned long rrval = c.nat->regs.rr[i]; + unsigned long reg = (unsigned long)i << 61; + IA64FAULT fault = IA64_NO_FAULT; + + if (rrval == 0) + continue; if (d->arch.is_vti) { //without VGCF_EXTRA_REGS check, //VTi domain doesn't boot. if (c.nat->flags & VGCF_EXTRA_REGS) - vmx_vcpu_set_rr(v, (unsigned long)i << 61, - c.nat->regs.rr[i]); + fault = vmx_vcpu_set_rr(v, reg, rrval); } else - vcpu_set_rr(v, (unsigned long)i << 61, - c.nat->regs.rr[i]); + fault = vcpu_set_rr(v, reg, rrval); + if (fault != IA64_NO_FAULT) + return -EINVAL; } if (c.nat->flags & VGCF_EXTRA_REGS) { diff -r 75280d0f64d5 -r 977f5886e288 xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Tue Oct 16 18:48:05 2007 +0900 +++ b/xen/include/public/arch-ia64.h Mon Oct 22 15:57:49 2007 +0900 @@ -416,8 +416,14 @@ struct vcpu_guest_context_regs { */ unsigned int rbs_voff; unsigned long rbs[2048]; - unsigned long rbs_nat; unsigned long rbs_rnat; + + /* + * RSE.N_STACKED_PHYS via PAL_RSE_INFO + * Strictly this isn't cpu context, but this value is necessary + * for domain save/restore. So is here. + */ + unsigned long num_phys_stacked; }; struct vcpu_guest_context {