# HG changeset patch
# User yamahata@xxxxxxxxxxxxx
# Date 1193036269 -32400
# Node ID 977f5886e288f4c6764e5e12cb6492eff54a6bca
# Parent  75280d0f64d5946bf0351a9f2cbc178b26696df1
clean up arch_get/set_info_guest()
- update comment in copy_rbs()
- don't warn when rbs_size = 0 for cpu initialization case.
- remove struct vcpu_guest_context_regs::rbs_nat member which isn't used.
  and add num_phys_stacked to struct vcpu_guest_context_regs.
  so far rbs_nat and rbs_rnat isn't, so it is allowed to change the offset
  of rbs_rnat.
- Add check when setting vRR[].
- don't set vRR[] if the value is zero
PATCHNAME: clean_up_arch_set_info_guest

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff -r 75280d0f64d5 -r 977f5886e288 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c	Tue Oct 16 18:48:05 2007 +0900
+++ b/xen/arch/ia64/xen/domain.c	Mon Oct 22 15:57:49 2007 +0900
@@ -634,6 +634,7 @@ int arch_vcpu_reset(struct vcpu *v)
 	return 0;
 }
 
+/* Here it is assumed that all of the CPUs has same RSE.N_STACKED_PHYS */
 static unsigned long num_phys_stacked;
 static int __init
 init_num_phys_stacked(void)
@@ -820,8 +821,9 @@ void arch_get_info_guest(struct vcpu *v,
 	COPY_FPREG(&c.nat->regs.f[30], &sw->f30);
 	COPY_FPREG(&c.nat->regs.f[31], &sw->f31);
 
-	for (i = 0; i < 96; i++)
-		COPY_FPREG(&c.nat->regs.f[i + 32], &v->arch._thread.fph[i]);
+	// f32 - f127
+	memcpy(&c.nat->regs.f[32], &v->arch._thread.fph[0],
+	       sizeof(v->arch._thread.fph));
 
 #define NATS_UPDATE(reg)						\
 	nats_update(&c.nat->regs.nats, (reg),				\
@@ -937,6 +939,8 @@ void arch_get_info_guest(struct vcpu *v,
 		c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1);
 	}
 
+	c.nat->regs.num_phys_stacked = num_phys_stacked;
+
  	c.nat->privregs_pfn = get_gpfn_from_mfn
 		(virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT);
 
@@ -1096,7 +1100,6 @@ copy_rbs(struct vcpu* v, unsigned long* 
 	if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2)
 		goto out;
 
-	//XXX TODO
 	// ia64_copy_rbs() uses real cpu's stack register.
 	// So it may fault with an Illigal Operation fault resulting
 	// in panic if rbs_size is too large to load compared to
@@ -1108,10 +1111,9 @@ copy_rbs(struct vcpu* v, unsigned long* 
 	// we need to copy them by hand without loadrs and flushrs
 	// However even if we implement that, similar issue still occurs
 	// when running guest. CPU context restore routine issues loadrs
-	// resulting in Illegal Operation fault. For such a case,
-	// we need to emulate RSE store.
-	// So it would be better to implement only RSE store emulation
-	// and copy stacked registers directly into guest RBS.
+	// resulting in Illegal Operation fault. And what if the vRSE is in
+	// enforced lazy mode? We can't store any dirty stacked registers
+	// into RBS without cover or br.call.
 	if (num_regs > num_phys_stacked) {
 		rc = -ENOSYS;
 		gdprintk(XENLOG_WARNING,
@@ -1235,10 +1237,6 @@ int arch_set_info_guest(struct vcpu *v, 
 	
 	uregs->pr = c.nat->regs.pr;
 	uregs->b0 = c.nat->regs.b[0];
-	if (((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
-		gdprintk(XENLOG_INFO,
-			 "rbs stack offset is different! xen 0x%x given 0x%x",
-			 (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
 	num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore,
 				     (unsigned long*)c.nat->regs.ar.bsp);
 	rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) -
@@ -1249,6 +1247,11 @@ int arch_set_info_guest(struct vcpu *v, 
 			 rbs_size, sizeof (c.nat->regs.rbs));
 		return -EINVAL;
 	}
+	if (rbs_size > 0 &&
+	    ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
+		gdprintk(XENLOG_INFO,
+			 "rbs stack offset is different! xen 0x%x given 0x%x",
+			 (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
 	
 	/* Protection against crazy user code.  */
 	if (!was_initialised)
@@ -1274,6 +1277,49 @@ int arch_set_info_guest(struct vcpu *v, 
 			sw->ar_bspstore = (unsigned long)((char*)rbs_bottom +
 							  dst_rbs_size);
 		}
+	}
+
+	// inhibit save/restore between cpus of different RSE.N_STACKED_PHYS.
+	// to avoid nasty issues.
+	// 
+	// The number of physical stacked general register(RSE.N_STACKED_PHYS)
+	// isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and
+	// the value might be exported to user/user process.
+	// (Linux does via /proc/cpuinfo)
+	// The SDM says only that the number is cpu implementation specific.
+	//
+	// If the number of restoring cpu is different from one of saving cpu,
+	// the followings or something wrose might happen.
+	// - Xen VMM itself may panic when issuing loadrs to run guest with
+	//   illegal operation fault
+	//   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+	//   restoring CPU
+	//   This case is detected to refuse restore by rbs_copy()
+	// - guest kernel may panic with illegal operation fault
+	//   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
+	//   restoring CPU
+	// - infomation leak from guest kernel to user process
+	//   When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of
+	//   restoring CPU
+	//   Before returning to user process, kernel should zero clear all
+	//   physical stacked resgisters to prevent kernel bits leak.
+	//   It would be based on RSE.N_STACKED_PHYS (Linux does.).
+	//   On the restored environtment the kernel clears only a part
+	//   of the physical stacked registers.
+	// - user processes or human operators would be confused.
+	//   RSE.N_STACKED_PHYS might be exported to user process or human
+	//   operators. Actually on linux it is exported via /proc/cpuinfo.
+	//   user processes might use it.
+	//   I don't know any concreate example, but it's possible in theory.
+	//   e.g. thread libraly may allocate RBS area based on the value.
+	//        (Fortunately glibc nptl doesn't)
+	if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */
+	    c.nat->regs.num_phys_stacked != num_phys_stacked) {
+		gdprintk(XENLOG_WARNING,
+			 "num phys stacked is different! "
+			 "xen 0x%lx given 0x%lx",
+			 num_phys_stacked, c.nat->regs.num_phys_stacked);
+		return -EINVAL;
 	}
 
 	uregs->r1 = c.nat->regs.r[1];
@@ -1337,9 +1383,9 @@ int arch_set_info_guest(struct vcpu *v, 
 	COPY_FPREG(&sw->f30, &c.nat->regs.f[30]);
 	COPY_FPREG(&sw->f31, &c.nat->regs.f[31]);
 
-	for (i = 0; i < 96; i++)
-		COPY_FPREG(&v->arch._thread.fph[i], &c.nat->regs.f[i + 32]);
-
+	// f32 - f127
+	memcpy(&v->arch._thread.fph[0], &c.nat->regs.f[32],
+	       sizeof(v->arch._thread.fph));
 
 #define UNAT_UPDATE(reg)					\
 	unat_update(&uregs->eml_unat, &uregs->r ## reg,		\
@@ -1434,20 +1480,21 @@ int arch_set_info_guest(struct vcpu *v, 
 
 	/* rr[] must be set before setting itrs[] dtrs[] */
 	for (i = 0; i < 8; i++) {
-		//XXX TODO integrity check.
-		//    if invalid value is given, 
-		//    vmx_load_all_rr() and load_region_regs()
-		//    result in General exception, reserved register/field
-		//    failt causing panicing xen.
+		unsigned long rrval = c.nat->regs.rr[i];
+		unsigned long reg = (unsigned long)i << 61;
+		IA64FAULT fault = IA64_NO_FAULT;
+
+		if (rrval == 0)
+			continue;
 		if (d->arch.is_vti) {
 			//without VGCF_EXTRA_REGS check,
 			//VTi domain doesn't boot.
 			if (c.nat->flags & VGCF_EXTRA_REGS)
-				vmx_vcpu_set_rr(v, (unsigned long)i << 61,
-						c.nat->regs.rr[i]);
+				fault = vmx_vcpu_set_rr(v, reg, rrval);
 		} else
-			vcpu_set_rr(v, (unsigned long)i << 61,
-				    c.nat->regs.rr[i]);
+			fault = vcpu_set_rr(v, reg, rrval);
+		if (fault != IA64_NO_FAULT)
+			return -EINVAL;
 	}
 
 	if (c.nat->flags & VGCF_EXTRA_REGS) {
diff -r 75280d0f64d5 -r 977f5886e288 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h	Tue Oct 16 18:48:05 2007 +0900
+++ b/xen/include/public/arch-ia64.h	Mon Oct 22 15:57:49 2007 +0900
@@ -416,8 +416,14 @@ struct vcpu_guest_context_regs {
          */
         unsigned int rbs_voff;
         unsigned long rbs[2048];
-        unsigned long rbs_nat;
         unsigned long rbs_rnat;
+
+        /*
+         * RSE.N_STACKED_PHYS via PAL_RSE_INFO
+         * Strictly this isn't cpu context, but this value is necessary
+         * for domain save/restore. So is here.
+         */
+        unsigned long num_phys_stacked;
 };
 
 struct vcpu_guest_context {