Xen 
 
Home About Xen.org Xen Xen Summit Wiki Mailing List Bug Tracker Xen Downloads
 
   
 

xen-changelog

[Xen-changelog] Merge.

# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 81576d3d1ca891cdcd81fada9025b2279a974458
# Parent  99914b54f7bffc8c27757a1ac2bc7a0d97597ac8
# Parent  0608852073c86cfa432ac32cb9223531950be896
Merge.

diff -r 99914b54f7bf -r 81576d3d1ca8 Config.mk
--- a/Config.mk Thu Aug 18 18:40:02 2005
+++ b/Config.mk Fri Aug 19 18:19:28 2005
@@ -35,3 +35,11 @@
 
 # Choose the best mirror to download linux kernel
 KERNEL_REPO = http://www.kernel.org
+
+# ACM_USE_SECURITY_POLICY is set to security policy of Xen
+# Supported models are:
+#      ACM_NULL_POLICY (ACM will not be built with this policy)
+#      ACM_CHINESE_WALL_POLICY
+#      ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+#      ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
diff -r 99914b54f7bf -r 81576d3d1ca8 docs/src/user.tex
--- a/docs/src/user.tex Thu Aug 18 18:40:02 2005
+++ b/docs/src/user.tex Fri Aug 19 18:19:28 2005
@@ -1763,7 +1763,7 @@
  physical address in the memory map will be ignored. This parameter
  may be specified with a B, K, M or G suffix, representing bytes,
  kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is bytes.
+ default unit, if no suffix is specified, is kilobytes.
 
 \item [dom0\_mem=xxx ] 
  Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Fri Aug 19 
18:19:28 2005
@@ -14,8 +14,7 @@
 
 c-obj-y        := semaphore.o vm86.o \
                ptrace.o sys_i386.o \
-               i387.o dmi_scan.o bootflag.o \
-               doublefault.o
+               i387.o dmi_scan.o bootflag.o
 s-obj-y        :=
 
 obj-y                          += cpu/
@@ -85,7 +84,7 @@
                        $(obj)/vsyscall-sysenter.o FORCE
        $(call if_changed,syscall)
 
-c-link := init_task.o
+c-link :=
 s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o 
vsyscall.lds.o syscall_table.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c    Fri Aug 19 
18:19:28 2005
@@ -569,7 +569,7 @@
        for (va = gdt_descr->address, f = 0;
             va < gdt_descr->address + gdt_descr->size;
             va += PAGE_SIZE, f++) {
-               frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+               frames[f] = virt_to_mfn(va);
                make_page_readonly((void *)va);
        }
        if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S  Fri Aug 19 18:19:28 2005
@@ -136,9 +136,6 @@
 ENTRY(empty_zero_page)
 
 .org 0x2000
-ENTRY(swapper_pg_dir)
-
-.org 0x3000
 ENTRY(cpu_gdt_table)
        .quad 0x0000000000000000        /* NULL descriptor */
        .quad 0x0000000000000000        /* 0x0b reserved */
@@ -190,10 +187,10 @@
        .quad 0x0000000000000000        /* 0xf8 - GDT entry 31: double-fault 
TSS */
        .fill GDT_ENTRIES-32,8,0
 
-.org 0x4000
+.org 0x3000
 ENTRY(default_ldt)
 
-.org 0x5000
+.org 0x4000
 /*
  * Real beginning of normal "text" segment
  */
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c        Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c        Fri Aug 19 
18:19:28 2005
@@ -80,7 +80,7 @@
                t->io_bitmap_ptr = bitmap;
 
                op.cmd = PHYSDEVOP_SET_IOBITMAP;
-               op.u.set_iobitmap.bitmap   = (unsigned long)bitmap;
+               op.u.set_iobitmap.bitmap   = (char *)bitmap;
                op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
                HYPERVISOR_physdev_op(&op);
        }
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c   Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c   Fri Aug 19 18:19:28 2005
@@ -198,7 +198,7 @@
 {
        struct mm_struct * mm = current->mm;
        __u32 entry_1, entry_2, *lp;
-       unsigned long mach_lp;
+       maddr_t mach_lp;
        int error;
        struct user_desc ldt_info;
 
@@ -245,7 +245,8 @@
 
        /* Install the new entry ...  */
 install:
-       error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+       error = HYPERVISOR_update_descriptor(
+               mach_lp, (u64)entry_1 | ((u64)entry_2<<32));
 
 out_unlock:
        up(&mm->context.sem);
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c       Fri Aug 19 
18:19:28 2005
@@ -115,20 +115,12 @@
 /* We don't actually take CPU down, just spin without interrupts. */
 static inline void play_dead(void)
 {
-       /* Ack it */
-       __get_cpu_var(cpu_state) = CPU_DEAD;
-
-       /* We shouldn't have to disable interrupts while dead, but
-        * some interrupts just don't seem to go away, and this makes
-        * it "work" for testing purposes. */
        /* Death loop */
        while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
                HYPERVISOR_yield();
 
-       local_irq_disable();
        __flush_tlb_all();
        cpu_set(smp_processor_id(), cpu_online_map);
-       local_irq_enable();
 }
 #else
 static inline void play_dead(void)
@@ -156,12 +148,19 @@
                        rmb();
 
                        if (cpu_is_offline(cpu)) {
+                               local_irq_disable();
+                               /* Ack it.  From this point on until
+                                  we get woken up, we're not allowed
+                                  to take any locks.  In particular,
+                                  don't printk. */
+                               __get_cpu_var(cpu_state) = CPU_DEAD;
 #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
                                /* Tell hypervisor to take vcpu down. */
                                HYPERVISOR_vcpu_down(cpu);
 #endif
                                play_dead();
-         }
+                               local_irq_enable();
+                       }
 
                        __get_cpu_var(irq_stat).idle_timestamp = jiffies;
                        xen_idle();
@@ -523,16 +522,15 @@
         * Load the per-thread Thread-Local Storage descriptor.
         * This is load_TLS(next, cpu) with multicalls.
         */
-#define C(i) do {                                                       \
-       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
-                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
-               mcl->op      = __HYPERVISOR_update_descriptor;          \
-               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
-                                        [GDT_ENTRY_TLS_MIN + i]);      \
-               mcl->args[1] = ((u32 *)&next->tls_array[i])[0];         \
-               mcl->args[2] = ((u32 *)&next->tls_array[i])[1];         \
-               mcl++;                                                  \
-       }                                                               \
+#define C(i) do {                                                      \
+       if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||    \
+                    next->tls_array[i].b != prev->tls_array[i].b)) {   \
+               mcl->op = __HYPERVISOR_update_descriptor;               \
+               *(u64 *)&mcl->args[0] = virt_to_machine(                \
+                       &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
+               *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i];    \
+               mcl++;                                                  \
+       }                                                               \
 } while (0)
        C(0); C(1); C(2);
 #undef C
@@ -549,7 +547,7 @@
                iobmp_op.cmd                     =
                        PHYSDEVOP_SET_IOBITMAP;
                iobmp_op.u.set_iobitmap.bitmap   =
-                       (unsigned long)next->io_bitmap_ptr;
+                       (char *)next->io_bitmap_ptr;
                iobmp_op.u.set_iobitmap.nr_ports =
                        next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
                mcl->op      = __HYPERVISOR_physdev_op;
@@ -791,3 +789,10 @@
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
+
+
+#ifndef CONFIG_X86_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Aug 19 18:19:28 2005
@@ -1604,11 +1604,10 @@
        for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
        {       
             pfn_to_mfn_frame_list[j] = 
-                 virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+                 virt_to_mfn(&phys_to_machine_mapping[i]);
        }
        HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-            virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
-
+            virt_to_mfn(pfn_to_mfn_frame_list);
 
        /*
         * NOTE: at this point the bootmem allocator is fully available.
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c       Fri Aug 19 
18:19:28 2005
@@ -904,7 +904,7 @@
                for (va = cpu_gdt_descr[cpu].address, f = 0;
                     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
                     va += PAGE_SIZE, f++) {
-                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       ctxt.gdt_frames[f] = virt_to_mfn(va);
                        make_page_readonly((void *)va);
                }
                ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
@@ -920,7 +920,7 @@
        ctxt.failsafe_callback_cs  = __KERNEL_CS;
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
+       ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
 
        boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
        printk("boot error: %ld\n", boot_error);
@@ -1616,3 +1616,21 @@
        smp_intr_init();
        local_setup_timer_irq();
 }
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+       int cpu = smp_processor_id();
+       extern atomic_t vcpus_rebooting;
+
+       /* We are the first thing the vcpu runs when it comes back,
+          and we are supposed to restore the IPIs and timer
+          interrupts etc.  When we return, the vcpu's idle loop will
+          start up again. */
+       _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+       _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+       _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+       _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) 
);
+       atomic_dec(&vcpus_rebooting);
+}
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c       Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c       Fri Aug 19 
18:19:28 2005
@@ -94,9 +94,6 @@
                iotlb_nslabs = simple_strtoul(str, &str, 0) <<
                        (20 - IO_TLB_SHIFT);
                iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
-               /* Round up to power of two (xen_create_contiguous_region). */
-               while (iotlb_nslabs & (iotlb_nslabs-1))
-                       iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
        }
        if (*str == ',')
                ++str;
@@ -123,9 +120,6 @@
        if (!iotlb_nslabs) {
                iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
                iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
-               /* Round up to power of two (xen_create_contiguous_region). */
-               while (iotlb_nslabs & (iotlb_nslabs-1))
-                       iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
        }
 
        bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
@@ -135,10 +129,14 @@
         */
        iotlb_virt_start = alloc_bootmem_low_pages(bytes);
        if (!iotlb_virt_start)
-               panic("Cannot allocate SWIOTLB buffer");
-
-       xen_create_contiguous_region(
-               (unsigned long)iotlb_virt_start, get_order(bytes));
+               panic("Cannot allocate SWIOTLB buffer!\n"
+                     "Use dom0_mem Xen boot parameter to reserve\n"
+                     "some DMA memory (e.g., dom0_mem=-128M).\n");
+
+       for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE)
+               xen_create_contiguous_region(
+                       (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT),
+                       get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT));
 
        iotlb_virt_end = iotlb_virt_start + bytes;
 
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c  Fri Aug 19 18:19:28 2005
@@ -745,7 +745,7 @@
 #endif
 
 /* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
 
 static struct irqaction irq_timer = {
        timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c      Fri Aug 19 18:19:28 2005
@@ -342,11 +342,15 @@
 extern void __init remap_numa_kva(void);
 #endif
 
+pgd_t *swapper_pg_dir;
+
 static void __init pagetable_init (void)
 {
        unsigned long vaddr;
-       pgd_t *pgd_base = swapper_pg_dir;
-       pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
+       pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
+
+       swapper_pg_dir = pgd_base;
+       init_mm.pgd    = pgd_base;
 
 #ifdef CONFIG_X86_PAE
        int i;
@@ -366,44 +370,6 @@
                __PAGE_KERNEL |= _PAGE_GLOBAL;
                __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
        }
-
-       /*
-        * Switch to proper mm_init page directory. Initialise from the current
-        * page directory, write-protect the new page directory, then switch to
-        * it. We clean up by write-enabling and then freeing the old page dir.
-        */
-#ifndef CONFIG_X86_PAE
-       memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
-       make_page_readonly(pgd_base);
-       xen_pgd_pin(__pa(pgd_base));
-       load_cr3(pgd_base);
-       xen_pgd_unpin(__pa(old_pgd));
-       make_page_writable(old_pgd);
-       __flush_tlb_all();
-       free_bootmem(__pa(old_pgd), PAGE_SIZE);
-#else
-       {
-               pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
-               pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
-               pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
-
-               memcpy(new_pmd,  old_pmd, PAGE_SIZE);
-               memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
-               set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
-
-               make_page_readonly(new_pmd);
-               make_page_readonly(pgd_base);
-               xen_pgd_pin(__pa(pgd_base));
-               load_cr3(pgd_base);
-               xen_pgd_unpin(__pa(old_pgd));
-               make_page_writable(old_pgd);
-               make_page_writable(old_pmd);
-               __flush_tlb_all();
-
-               free_bootmem(__pa(old_pgd), PAGE_SIZE);
-               free_bootmem(__pa(old_pmd), PAGE_SIZE);
-       }
-#endif
 
        init_mm.context.pinned = 1;
        kernel_physical_mapping_init(pgd_base);
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c   Fri Aug 19 18:19:28 2005
@@ -306,7 +306,7 @@
 {
        mmu_update_t **v = (mmu_update_t **)data;
 
-       (*v)->ptr = ((physaddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+       (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
                     PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
        (*v)++;
 
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c   Fri Aug 19 18:19:28 2005
@@ -170,7 +170,7 @@
        __flush_tlb_one(vaddr);
 }
 
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t 
flags)
+void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
 {
        unsigned long address = __fix_to_virt(idx);
 
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c     Fri Aug 19 18:19:28 2005
@@ -144,7 +144,7 @@
     vcpu_info_t   *vcpu_info = &s->vcpu_data[cpu];
 
     vcpu_info->evtchn_upcall_pending = 0;
-    
+
     /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
     l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
     while ( l1 != 0 )
@@ -158,9 +158,9 @@
             l2 &= ~(1 << l2i);
             
             port = (l1i << 5) + l2i;
-            if ( (irq = evtchn_to_irq[port]) != -1 )
+            if ( (irq = evtchn_to_irq[port]) != -1 ) {
                 do_IRQ(irq, regs);
-            else
+           } else
                 evtchn_device_upcall(port);
         }
     }
@@ -243,6 +243,74 @@
     }
 
     spin_unlock(&irq_mapping_update_lock);
+}
+
+/* This is only used when a vcpu from an xm save.  The ipi is expected
+   to have been bound before we suspended, and so all of the xenolinux
+   state is set up; we only need to restore the Xen side of things.
+   The irq number has to be the same, but the evtchn number can
+   change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd = EVTCHNOP_bind_ipi;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+       panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+    evtchn = op.u.bind_ipi.port;
+
+    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+          ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+          evtchn);
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
+          evtchn_to_irq[evtchn]);
+    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+    bind_evtchn_to_cpu(evtchn, vcpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+    evtchn_op_t op;
+    int evtchn;
+
+    spin_lock(&irq_mapping_update_lock);
+
+    op.cmd              = EVTCHNOP_bind_virq;
+    op.u.bind_virq.virq = virq;
+    if ( HYPERVISOR_event_channel_op(&op) != 0 )
+            panic("Failed to bind virtual IRQ %d\n", virq);
+    evtchn = op.u.bind_virq.port;
+
+    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+    irq_to_evtchn[irq] = -1;
+
+    evtchn_to_irq[evtchn] = irq;
+    irq_to_evtchn[irq]    = evtchn;
+
+    per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+    bind_evtchn_to_cpu(evtchn, cpu);
+
+    spin_unlock(&irq_mapping_update_lock);
+
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
 }
 
 int bind_ipi_to_irq(int ipi)
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Fri Aug 19 18:19:28 2005
@@ -16,6 +16,8 @@
 #include <asm-xen/queues.h>
 #include <asm-xen/xenbus.h>
 #include <asm-xen/ctrl_if.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
 
 #define SHUTDOWN_INVALID  -1
 #define SHUTDOWN_POWEROFF  0
@@ -58,20 +60,74 @@
 /* Ignore multiple shutdown requests. */
 static int shutting_down = SHUTDOWN_INVALID;
 
-static void __do_suspend(void)
+#ifndef CONFIG_HOTPLUG_CPU
+#define cpu_down(x) (-EOPNOTSUPP)
+#define cpu_up(x) (-EOPNOTSUPP)
+#endif
+
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages;
+    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+    if (r != 0)
+       panic("pickling vcpu %d -> %d!\n", vcpu, r);
+
+    /* Translate from machine to physical addresses where necessary,
+       so that they can be translated to our new machine address space
+       after resume.  libxc is responsible for doing this to vcpu0,
+       but we do it to the others. */
+    gdt_pages = (ctxt->gdt_ents + 511) / 512;
+    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++)
+       ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+}
+
+void _restore_vcpu(int cpu);
+
+atomic_t vcpus_rebooting;
+
+static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+    int r;
+    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+    /* This is kind of a hack, and implicitly relies on the fact that
+       the vcpu stops in a place where all of the call clobbered
+       registers are already dead. */
+    ctxt->user_regs.esp -= 4;
+    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+    /* De-canonicalise.  libxc handles this for vcpu 0, but we need
+       to do it for the other vcpus. */
+    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+    for (r = 0; r < gdt_pages; r++)
+       ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+
+    atomic_set(&vcpus_rebooting, 1);
+    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+    if (r != 0) {
+       printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+       return -1;
+    }
+
+    /* Make sure we wait for the new vcpu to come up before trying to do
+       anything with it or starting the next one. */
+    while (atomic_read(&vcpus_rebooting))
+       barrier();
+
+    return 0;
+}
+
+static int __do_suspend(void *ignore)
 {
     int i, j;
     suspend_record_t *suspend_record;
+    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
 
     /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
        /* XXX SMH: yes it would :-( */ 
-#ifdef CONFIG_XEN_BLKDEV_FRONTEND
-    extern void blkdev_suspend(void);
-    extern void blkdev_resume(void);
-#else
-#define blkdev_suspend() do{}while(0)
-#define blkdev_resume()  do{}while(0)
-#endif
 
 #ifdef CONFIG_XEN_NETDEV_FRONTEND
     extern void netif_suspend(void);
@@ -104,13 +160,63 @@
     extern unsigned long max_pfn;
     extern unsigned int *pfn_to_mfn_frame_list;
 
+    cpumask_t prev_online_cpus, prev_present_cpus;
+    int err = 0;
+
+    BUG_ON(smp_processor_id() != 0);
+    BUG_ON(in_interrupt());
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
+    if (num_online_cpus() > 1) {
+       printk(KERN_WARNING "Can't suspend SMP guests without 
CONFIG_HOTPLUG_CPU\n");
+       return -EOPNOTSUPP;
+    }
+#endif
+
     suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
     if ( suspend_record == NULL )
         goto out;
 
+    /* Take all of the other cpus offline.  We need to be careful not
+       to get preempted between the final test for num_online_cpus()
+       == 1 and disabling interrupts, since otherwise userspace could
+       bring another cpu online, and then we'd be stuffed.  At the
+       same time, cpu_down can reschedule, so we need to enable
+       preemption while doing that.  This kind of sucks, but should be
+       correct. */
+    /* (We don't need to worry about other cpus bringing stuff up,
+       since by the time num_online_cpus() == 1, there aren't any
+       other cpus) */
+    cpus_clear(prev_online_cpus);
+    preempt_disable();
+    while (num_online_cpus() > 1) {
+       preempt_enable();
+       for_each_online_cpu(i) {
+           if (i == 0)
+               continue;
+           err = cpu_down(i);
+           if (err != 0) {
+               printk(KERN_CRIT "Failed to take all CPUs down: %d.\n", err);
+               goto out_reenable_cpus;
+           }
+           cpu_set(i, prev_online_cpus);
+       }
+       preempt_disable();
+    }
+
     suspend_record->nr_pfns = max_pfn; /* final number of pfns */
 
     __cli();
+
+    preempt_enable();
+
+    cpus_clear(prev_present_cpus);
+    for_each_present_cpu(i) {
+       if (i == 0)
+           continue;
+       save_vcpu_context(i, &suspended_cpu_records[i]);
+       cpu_set(i, prev_present_cpus);
+    }
 
 #ifdef __i386__
     mm_pin_all();
@@ -119,8 +225,6 @@
 
     netif_suspend();
 
-    blkdev_suspend();
-
     time_suspend();
 
 #ifdef CONFIG_SMP
@@ -141,7 +245,9 @@
     memcpy(&suspend_record->resume_info, &xen_start_info,
            sizeof(xen_start_info));
 
-    HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
+    /* We'll stop somewhere inside this hypercall.  When it returns,
+       we'll start resuming after the restore. */
+    HYPERVISOR_suspend(virt_to_mfn(suspend_record));
 
     shutting_down = SHUTDOWN_INVALID; 
 
@@ -157,10 +263,10 @@
     for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
     {
         pfn_to_mfn_frame_list[j] = 
-            virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+            virt_to_mfn(&phys_to_machine_mapping[i]);
     }
     HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
-        virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+        virt_to_mfn(pfn_to_mfn_frame_list);
 
     gnttab_resume();
 
@@ -176,17 +282,30 @@
 
     time_resume();
 
-    blkdev_resume();
-
     netif_resume();
 
     usbif_resume();
 
+    for_each_cpu_mask(i, prev_present_cpus) {
+       restore_vcpu_context(i, &suspended_cpu_records[i]);
+    }
+
     __sti();
+
+ out_reenable_cpus:
+    for_each_cpu_mask(i, prev_online_cpus) {
+       j = cpu_up(i);
+       if (j != 0) {
+           printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
+                  i, j);
+           err = j;
+       }
+    }
 
  out:
     if ( suspend_record != NULL )
         free_page((unsigned long)suspend_record);
+    return err;
 }
 
 static int shutdown_process(void *__unused)
@@ -233,6 +352,18 @@
     return 0;
 }
 
+static struct task_struct *kthread_create_on_cpu(int (*f)(void *arg),
+                                                void *arg,
+                                                const char *name,
+                                                int cpu)
+{
+    struct task_struct *p;
+    p = kthread_create(f, arg, name);
+    kthread_bind(p, cpu);
+    wake_up_process(p);
+    return p;
+}
+
 static void __shutdown_handler(void *unused)
 {
     int err;
@@ -245,7 +376,7 @@
     }
     else
     {
-        __do_suspend();
+       kthread_create_on_cpu(__do_suspend, NULL, "suspender", 0);
     }
 }
 
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c     Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c     Fri Aug 19 
18:19:28 2005
@@ -453,8 +453,8 @@
 #define C(i) do {                                                      \
        if (unlikely(next->tls_array[i] != prev->tls_array[i])) {       \
                mcl->op      = __HYPERVISOR_update_descriptor;          \
-               mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu)  \
-                                              [GDT_ENTRY_TLS_MIN + i]); \
+               mcl->args[0] = virt_to_machine(                         \
+                       &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
                mcl->args[1] = next->tls_array[i];                      \
                mcl++;                                                  \
        }                                                               \
@@ -474,7 +474,7 @@
                iobmp_op.cmd                     =
                        PHYSDEVOP_SET_IOBITMAP;
                iobmp_op.u.set_iobitmap.bitmap   =
-                       (unsigned long)next->io_bitmap_ptr;
+                       (char *)next->io_bitmap_ptr;
                iobmp_op.u.set_iobitmap.nr_ports =
                        next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
                mcl->op      = __HYPERVISOR_physdev_op;
@@ -743,3 +743,9 @@
                sp -= get_random_int() % 8192;
        return sp & ~0xf;
 }
+
+#ifndef CONFIG_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c       Fri Aug 19 
18:19:28 2005
@@ -795,7 +795,7 @@
                for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned 
long)), j++ )
                {       
                        pfn_to_mfn_frame_list[j] = 
-                               virt_to_machine(&phys_to_machine_mapping[i]) >> 
PAGE_SHIFT;
+                               virt_to_mfn(&phys_to_machine_mapping[i]);
                }
 
        }
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c     Fri Aug 19 
18:19:28 2005
@@ -139,7 +139,7 @@
        for (va = gdt_descr->address, f = 0;
             va < gdt_descr->address + gdt_descr->size;
             va += PAGE_SIZE, f++) {
-               frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+               frames[f] = virt_to_mfn(va);
                make_page_readonly((void *)va);
        }
        if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
@@ -280,19 +280,16 @@
        if (cpu) {
                memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
        }       
-#endif
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
 
-        cpu_gdt_init(&cpu_gdt_descr[cpu]);
-
-#ifndef CONFIG_XEN 
        memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES 
* 8);
-
 #else
        memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
            GDT_ENTRY_TLS_ENTRIES * 8);
+
+    cpu_gdt_init(&cpu_gdt_descr[cpu]);
 #endif
        
        /*
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c     Fri Aug 19 
18:19:28 2005
@@ -780,7 +780,7 @@
                for (va = cpu_gdt_descr[cpu].address, f = 0;
                     va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
                     va += PAGE_SIZE, f++) {
-                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       ctxt.gdt_frames[f] = virt_to_mfn(va);
                        make_page_readonly((void *)va);
                }
                ctxt.gdt_ents = GDT_ENTRIES;
@@ -795,7 +795,7 @@
        ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
        ctxt.syscall_callback_eip  = (unsigned long)system_call;
 
-       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+       ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
 
        boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
 
@@ -1286,4 +1286,10 @@
        smp_intr_init();
        local_setup_timer_irq();
 }
-#endif
+
+void _restore_vcpu(void)
+{
+       /* XXX need to write this */
+}
+
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c    Fri Aug 19 18:19:28 2005
@@ -742,7 +742,7 @@
                                set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
                        else
                                __set_fixmap(FIX_ISAMAP_BEGIN - i,
-                                            virt_to_machine(empty_zero_page),
+                                            virt_to_mfn(empty_zero_page) << 
PAGE_SHIFT,
                                             PAGE_KERNEL_RO);
        }
 #endif
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Fri Aug 19 18:19:28 2005
@@ -1,2 +1,2 @@
 
-obj-y  := blkback.o control.o interface.o vbd.o
+obj-y  := blkback.o xenbus.o interface.o vbd.o
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c        Fri Aug 19 
18:19:28 2005
@@ -104,7 +104,6 @@
 #endif
 
 static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
 static void make_response(blkif_t *blkif, unsigned long id, 
                           unsigned short op, int st);
@@ -349,10 +348,6 @@
             dispatch_rw_block_io(blkif, req);
             break;
 
-        case BLKIF_OP_PROBE:
-            dispatch_probe(blkif, req);
-            break;
-
         default:
             DPRINTK("error: unknown block io operation [%d]\n",
                     req->operation);
@@ -363,66 +358,6 @@
 
     blk_ring->req_cons = i;
     return more_to_do;
-}
-
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
-{
-    int rsp = BLKIF_RSP_ERROR;
-    int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
-    /* We expect one buffer only. */
-    if ( unlikely(req->nr_segments != 1) )
-        goto out;
-
-    /* Make sure the buffer is page-sized. */
-    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
-         (blkif_last_sect(req->frame_and_sects[0]) != ((PAGE_SIZE/512)-1)) )
-        goto out;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    {
-        struct gnttab_map_grant_ref map;
-
-        map.host_addr = MMAP_VADDR(pending_idx, 0);
-        map.flags = GNTMAP_host_map;
-        map.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
-        map.dom = blkif->domid;
-
-        if ( unlikely(HYPERVISOR_grant_table_op(
-                        GNTTABOP_map_grant_ref, &map, 1)))
-            BUG();
-
-        if ( map.handle < 0 )
-            goto out;
-
-        pending_handle(pending_idx, 0) = map.handle;
-    }
-#else /* else CONFIG_XEN_BLKDEV_GRANT */
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-    /* Grab the real frontend out of the probe message. */
-    if (req->frame_and_sects[1] == BLKTAP_COOKIE) 
-        blkif->is_blktap = 1;
-#endif
-
-
-    if ( HYPERVISOR_update_va_mapping_otherdomain(
-        MMAP_VADDR(pending_idx, 0),
-        pfn_pte_ma(req->frame_and_sects[0] >> PAGE_SHIFT, PAGE_KERNEL),
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
-        0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
-#else
-        0, blkif->domid) )
-#endif
-        goto out;
-#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
-   
-    rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
-                    PAGE_SIZE / sizeof(vdisk_t));
-
- out:
-    fast_flush_area(pending_idx, 1);
-    make_response(blkif, req->id, req->operation, rsp);
 }
 
 static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
@@ -460,7 +395,7 @@
         goto bad_descriptor;
     }
 
-    preq.dev           = req->device;
+    preq.dev           = req->handle;
     preq.sector_number = req->sector_number;
     preq.nr_sects      = 0;
 
@@ -730,8 +665,8 @@
         0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 #endif
 
-    blkif_ctrlif_init();
-    
+    blkif_xenbus_init();
+
 #ifdef CONFIG_XEN_BLKDEV_GRANT
     memset( pending_grant_handles,  BLKBACK_INVALID_HANDLE, MMAP_PAGES );
     printk(KERN_ALERT "Blkif backend is using grant tables.\n");
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Fri Aug 19 18:19:28 2005
@@ -13,7 +13,6 @@
 #include <asm/io.h>
 #include <asm/setup.h>
 #include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/evtchn.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/xen-public/io/blkif.h>
@@ -47,6 +46,7 @@
     /* Physical parameters of the comms window. */
     unsigned long     shmem_frame;
     unsigned int      evtchn;
+    unsigned int      remote_evtchn;
     /* Comms information. */
     blkif_back_ring_t blk_ring;
     /* VBDs attached to this interface. */
@@ -71,7 +71,7 @@
     struct work_struct work;
 #ifdef CONFIG_XEN_BLKDEV_GRANT
     u16 shmem_handle;
-    memory_t shmem_vaddr;
+    unsigned long shmem_vaddr;
     grant_ref_t shmem_ref;
 #endif
 } blkif_t;
@@ -81,17 +81,29 @@
 void blkif_connect(blkif_be_connect_t *connect);
 int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
 void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *blkif_find(domid_t domid);
+void free_blkif(blkif_t *blkif);
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
+
 #define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
 #define blkif_put(_b)                             \
     do {                                          \
         if ( atomic_dec_and_test(&(_b)->refcnt) ) \
-            blkif_disconnect_complete(_b);        \
+            free_blkif(_b);                      \
     } while (0)
 
-void vbd_create(blkif_be_vbd_create_t *create); 
+struct vbd;
+void vbd_free(blkif_t *blkif, struct vbd *vbd);
+
+/* Creates inactive vbd. */
+struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t 
pdevice, int readonly);
+int vbd_is_active(struct vbd *vbd);
+void vbd_activate(blkif_t *blkif, struct vbd *vbd);
+
+unsigned long vbd_size(struct vbd *vbd);
+unsigned int vbd_info(struct vbd *vbd);
+unsigned long vbd_secsize(struct vbd *vbd);
 void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
 void destroy_all_vbds(blkif_t *blkif);
 
 struct phys_req {
@@ -104,9 +116,10 @@
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
 
 void blkif_interface_init(void);
-void blkif_ctrlif_init(void);
 
 void blkif_deschedule(blkif_t *blkif);
+
+void blkif_xenbus_init(void);
 
 irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
 
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c      Fri Aug 19 
18:19:28 2005
@@ -7,24 +7,135 @@
  */
 
 #include "common.h"
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
 #endif
 
 #define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1))
 
 static kmem_cache_t *blkif_cachep;
 static blkif_t      *blkif_hash[BLKIF_HASHSZ];
 
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
-    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif != NULL) && 
-            ((blkif->domid != domid) || (blkif->handle != handle)) )
+blkif_t *blkif_find(domid_t domid)
+{
+    blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)];
+
+    while (blkif) {
+       if (blkif->domid == domid) {
+           blkif_get(blkif);
+           return blkif;
+       }
         blkif = blkif->hash_next;
+    }
+
+    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+    if (!blkif)
+           return ERR_PTR(-ENOMEM);
+
+    memset(blkif, 0, sizeof(*blkif));
+    blkif->domid = domid;
+    blkif->status = DISCONNECTED;
+    spin_lock_init(&blkif->vbd_lock);
+    spin_lock_init(&blkif->blk_ring_lock);
+    atomic_set(&blkif->refcnt, 1);
+
+    blkif->hash_next = blkif_hash[BLKIF_HASH(domid)];
+    blkif_hash[BLKIF_HASH(domid)] = blkif;
     return blkif;
+}
+
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+                            unsigned long shared_page)
+{
+    return direct_remap_area_pages(&init_mm, localaddr,
+                                  shared_page<<PAGE_SHIFT, PAGE_SIZE,
+                                  __pgprot(_KERNPG_TABLE), blkif->domid);
+}
+
+static void unmap_frontend_page(blkif_t *blkif)
+{
+}
+#else
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+                            unsigned long shared_page)
+{
+    struct gnttab_map_grant_ref op;
+    op.host_addr = localaddr;
+    op.flags = GNTMAP_host_map;
+    op.ref = shared_page;
+    op.dom = blkif->domid;
+       
+    BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+
+    if (op.handle < 0) {
+       DPRINTK(" Grant table operation failure !\n");
+       return op.handle;
+    }
+
+    blkif->shmem_ref = shared_page;
+    blkif->shmem_handle = op.handle;
+    blkif->shmem_vaddr = localaddr;
+    return 0;
+}
+
+static void unmap_frontend_page(blkif_t *blkif)
+{
+    struct gnttab_unmap_grant_ref op;
+
+    op.host_addr = blkif->shmem_vaddr;
+    op.handle = blkif->shmem_handle;
+    op.dev_bus_addr = 0;
+    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+}
+#endif /* CONFIG_XEN_BLKDEV_GRANT */
+
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
+{
+    struct vm_struct *vma;
+    blkif_sring_t *sring;
+    evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+    int err;
+
+    BUG_ON(blkif->remote_evtchn);
+
+    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+       return -ENOMEM;
+
+    err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page);
+    if (err) {
+        vfree(vma->addr);
+       return err;
+    }
+
+    op.u.bind_interdomain.dom1 = DOMID_SELF;
+    op.u.bind_interdomain.dom2 = blkif->domid;
+    op.u.bind_interdomain.port1 = 0;
+    op.u.bind_interdomain.port2 = evtchn;
+    err = HYPERVISOR_event_channel_op(&op);
+    if (err) {
+       unmap_frontend_page(blkif);
+       vfree(vma->addr);
+       return err;
+    }
+
+    blkif->evtchn = op.u.bind_interdomain.port1;
+    blkif->remote_evtchn = evtchn;
+
+    sring = (blkif_sring_t *)vma->addr;
+    SHARED_RING_INIT(sring);
+    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+
+    bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
+                             blkif);
+    blkif->status        = CONNECTED;
+    blkif->shmem_frame   = shared_page;
+
+    return 0;
 }
 
 static void __blkif_disconnect_complete(void *arg)
@@ -32,21 +143,13 @@
     blkif_t              *blkif = (blkif_t *)arg;
     ctrl_msg_t            cmsg;
     blkif_be_disconnect_t disc;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    struct gnttab_unmap_grant_ref op;
-#endif
 
     /*
      * These can't be done in blkif_disconnect() because at that point there
      * may be outstanding requests at the disc whose asynchronous responses
      * must still be notified to the remote driver.
      */
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    op.host_addr      = blkif->shmem_vaddr;
-    op.handle         = blkif->shmem_handle;
-    op.dev_bus_addr   = 0;
-    BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-#endif
+    unmap_frontend_page(blkif);
     vfree(blkif->blk_ring.sring);
 
     /* Construct the deferred response message. */
@@ -81,200 +184,35 @@
     schedule_work(&blkif->work);
 }
 
-void blkif_create(blkif_be_create_t *create)
-{
-    domid_t       domid  = create->domid;
-    unsigned int  handle = create->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+void free_blkif(blkif_t *blkif)
+{
+    blkif_t     **pblkif;
+    evtchn_op_t op = { .cmd = EVTCHNOP_close };
+
+    op.u.close.port = blkif->evtchn;
+    op.u.close.dom = DOMID_SELF;
+    HYPERVISOR_event_channel_op(&op);
+    op.u.close.port = blkif->remote_evtchn;
+    op.u.close.dom = blkif->domid;
+    HYPERVISOR_event_channel_op(&op);
+
+    if (blkif->evtchn)
+        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+
+    if (blkif->blk_ring.sring) {
+       unmap_frontend_page(blkif);
+       vfree(blkif->blk_ring.sring);
+    }
+
+    pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)];
+    while ( *pblkif != blkif )
     {
-        DPRINTK("Could not create blkif: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    memset(blkif, 0, sizeof(*blkif));
-    blkif->domid  = domid;
-    blkif->handle = handle;
-    blkif->status = DISCONNECTED;
-    spin_lock_init(&blkif->vbd_lock);
-    spin_lock_init(&blkif->blk_ring_lock);
-    atomic_set(&blkif->refcnt, 0);
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( *pblkif != NULL )
-    {
-        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
-        {
-            DPRINTK("Could not create blkif: already exists\n");
-            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
-            kmem_cache_free(blkif_cachep, blkif);
-            return;
-        }
+       BUG_ON(!*pblkif);
         pblkif = &(*pblkif)->hash_next;
     }
-
-    blkif->hash_next = *pblkif;
-    *pblkif = blkif;
-
-    DPRINTK("Successfully created blkif\n");
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
-    domid_t       domid  = destroy->domid;
-    unsigned int  handle = destroy->blkif_handle;
-    blkif_t     **pblkif, *blkif;
-
-    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
-    while ( (blkif = *pblkif) != NULL )
-    {
-        if ( (blkif->domid == domid) && (blkif->handle == handle) )
-        {
-            if ( blkif->status != DISCONNECTED )
-                goto still_connected;
-            goto destroy;
-        }
-        pblkif = &blkif->hash_next;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-    return;
-
- still_connected:
-    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-    return;
-
- destroy:
     *pblkif = blkif->hash_next;
     destroy_all_vbds(blkif);
     kmem_cache_free(blkif_cachep, blkif);
-    destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_connect(blkif_be_connect_t *connect)
-{
-    domid_t        domid  = connect->domid;
-    unsigned int   handle = connect->blkif_handle;
-    unsigned int   evtchn = connect->evtchn;
-    unsigned long  shmem_frame = connect->shmem_frame;
-    struct vm_struct *vma;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    int ref = connect->shmem_ref;
-#else
-    pgprot_t       prot;
-    int            error;
-#endif
-    blkif_t       *blkif;
-    blkif_sring_t *sring;
-
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", 
-                connect->domid, connect->blkif_handle); 
-        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
-    {
-        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
-    prot = __pgprot(_KERNPG_TABLE);
-    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
-                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
-                                    prot, domid);
-    if ( error != 0 )
-    {
-        if ( error == -ENOMEM )
-            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        else if ( error == -EFAULT )
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-        else
-            connect->status = BLKIF_BE_STATUS_ERROR;
-        vfree(vma->addr);
-        return;
-    }
-#else
-    { /* Map: Use the Grant table reference */
-        struct gnttab_map_grant_ref op;
-        op.host_addr      = VMALLOC_VMADDR(vma->addr);
-        op.flags          = GNTMAP_host_map;
-        op.ref            = ref;
-        op.dom            = domid;
-       
-        BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-       
-        handle = op.handle;
-       
-        if (op.handle < 0) {
-            DPRINTK(" Grant table operation failure !\n");
-            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
-            vfree(vma->addr);
-            return;
-        }
-
-        blkif->shmem_ref = ref;
-        blkif->shmem_handle = handle;
-        blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
-    }
-#endif
-
-    if ( blkif->status != DISCONNECTED )
-    {
-        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
-        vfree(vma->addr);
-        return;
-    }
-    sring = (blkif_sring_t *)vma->addr;
-    SHARED_RING_INIT(sring);
-    BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
-    
-    blkif->evtchn        = evtchn;
-    blkif->shmem_frame   = shmem_frame;
-    blkif->status        = CONNECTED;
-    blkif_get(blkif);
-
-    bind_evtchn_to_irqhandler(
-        blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
-
-    connect->status = BLKIF_BE_STATUS_OKAY;
-}
-
-int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
-{
-    domid_t       domid  = disconnect->domid;
-    unsigned int  handle = disconnect->blkif_handle;
-    blkif_t      *blkif;
-
-    blkif = blkif_find_by_handle(domid, handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("blkif_disconnect attempted for non-existent blkif"
-                " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); 
-        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return 1; /* Caller will send response error message. */
-    }
-
-    if ( blkif->status == CONNECTED )
-    {
-        blkif->status = DISCONNECTING;
-        blkif->disconnect_rspid = rsp_id;
-        wmb(); /* Let other CPUs see the status change. */
-        unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
-        blkif_deschedule(blkif);
-        blkif_put(blkif);
-        return 0; /* Caller should not send response message. */
-    }
-
-    disconnect->status = BLKIF_BE_STATUS_OKAY;
-    return 1;
 }
 
 void __init blkif_interface_init(void)
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c    Fri Aug 19 18:19:28 2005
@@ -11,13 +11,16 @@
  */
 
 #include "common.h"
+#include <asm-xen/xenbus.h>
 
 struct vbd { 
-    blkif_vdev_t   vdevice;     /* what the domain refers to this vbd as */
+    blkif_vdev_t   handle;     /* what the domain refers to this vbd as */
     unsigned char  readonly;    /* Non-zero -> read-only */
     unsigned char  type;        /* VDISK_xxx */
     blkif_pdev_t   pdevice;     /* phys device that this vbd maps to */
     struct block_device *bdev;
+
+    int active;
     rb_node_t      rb;          /* for linking into R-B tree lookup struct */
 }; 
 
@@ -33,140 +36,128 @@
 #define bdev_hardsect_size(_b) 512
 #endif
 
-void vbd_create(blkif_be_vbd_create_t *create) 
+unsigned long vbd_size(struct vbd *vbd)
+{
+       return vbd_sz(vbd);
+}
+
+unsigned int vbd_info(struct vbd *vbd)
+{
+       return vbd->type | (vbd->readonly?VDISK_READONLY:0);
+}
+
+unsigned long vbd_secsize(struct vbd *vbd)
+{
+       return bdev_hardsect_size(vbd->bdev);
+}
+
+int vbd_is_active(struct vbd *vbd)
+{
+       return vbd->active;
+}
+
+struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+                      blkif_pdev_t pdevice, int readonly)
 {
     struct vbd  *vbd; 
+
+    if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
+    {
+        DPRINTK("vbd_create: out of memory\n");
+       return ERR_PTR(-ENOMEM);
+    }
+
+    vbd->handle   = handle; 
+    vbd->readonly = readonly;
+    vbd->type     = 0;
+    vbd->active   = 0;
+
+    vbd->pdevice  = pdevice;
+
+    /* FIXME: Who frees vbd on failure? --RR */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+    vbd->bdev = open_by_devnum(
+        vbd_map_devnum(vbd->pdevice),
+        vbd->readonly ? FMODE_READ : FMODE_WRITE);
+    if ( IS_ERR(vbd->bdev) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        return ERR_PTR(-ENOENT);
+    }
+
+    if ( (vbd->bdev->bd_disk == NULL) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        bdev_put(vbd->bdev);
+        return ERR_PTR(-ENOENT);
+    }
+
+    if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
+        vbd->type |= VDISK_CDROM;
+    if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
+        vbd->type |= VDISK_REMOVABLE;
+
+#else
+    if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
+    {
+        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+        return ERR_PTR(-ENOENT);
+    }
+#endif
+
+    DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+            handle, blkif->domid);
+    return vbd;
+}
+
+void vbd_activate(blkif_t *blkif, struct vbd *vbd)
+{
     rb_node_t  **rb_p, *rb_parent = NULL;
-    blkif_t     *blkif;
-    blkif_vdev_t vdevice = create->vdevice;
-
-    blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n", 
-                create->domid, create->blkif_handle); 
-        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
+    struct vbd *i;
+    BUG_ON(vbd_is_active(vbd));
+
+    /* Find where to put it. */
     rb_p = &blkif->vbd_rb.rb_node;
     while ( *rb_p != NULL )
     {
         rb_parent = *rb_p;
-        vbd = rb_entry(rb_parent, struct vbd, rb);
-        if ( vdevice < vbd->vdevice )
+        i = rb_entry(rb_parent, struct vbd, rb);
+        if ( vbd->handle < i->handle )
         {
             rb_p = &rb_parent->rb_left;
         }
-        else if ( vdevice > vbd->vdevice )
+        else if ( vbd->handle > i->handle )
         {
             rb_p = &rb_parent->rb_right;
         }
         else
         {
-            DPRINTK("vbd_create attempted for already existing vbd\n");
-            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
-            return;
+           /* We never create two of same vbd, so not possible. */
+           BUG();
         }
     }
 
-    if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
-    {
-        DPRINTK("vbd_create: out of memory\n");
-        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
-        return;
-    }
-
-    vbd->vdevice  = vdevice; 
-    vbd->readonly = create->readonly;
-    vbd->type     = 0;
-
-    /* Mask to 16-bit for compatibility with old tools */
-    vbd->pdevice  = create->pdevice & 0xffff;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    vbd->bdev = open_by_devnum(
-        vbd_map_devnum(vbd->pdevice),
-        vbd->readonly ? FMODE_READ : FMODE_WRITE);
-    if ( IS_ERR(vbd->bdev) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        return;
-    }
-
-    if ( (vbd->bdev->bd_disk == NULL) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        bdev_put(vbd->bdev);
-        return;
-    }
-
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
-        vbd->type |= VDISK_CDROM;
-    if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
-        vbd->type |= VDISK_REMOVABLE;
-
-#else
-    if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
-    {
-        DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
-        create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
-        return;
-    }
-#endif
+    /* Now we're active. */
+    vbd->active = 1;
+    blkif_get(blkif);
 
     spin_lock(&blkif->vbd_lock);
     rb_link_node(&vbd->rb, rb_parent, rb_p);
     rb_insert_color(&vbd->rb, &blkif->vbd_rb);
     spin_unlock(&blkif->vbd_lock);
-
-    DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
-            vdevice, create->domid);
-    create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
-{
-    blkif_t           *blkif;
-    struct vbd        *vbd;
-    rb_node_t         *rb;
-    blkif_vdev_t       vdevice = destroy->vdevice;
-
-    blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
-    if ( unlikely(blkif == NULL) )
-    {
-        DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n", 
-                destroy->domid, destroy->blkif_handle); 
-        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
-        return;
-    }
-
-    rb = blkif->vbd_rb.rb_node;
-    while ( rb != NULL )
-    {
-        vbd = rb_entry(rb, struct vbd, rb);
-        if ( vdevice < vbd->vdevice )
-            rb = rb->rb_left;
-        else if ( vdevice > vbd->vdevice )
-            rb = rb->rb_right;
-        else
-            goto found;
-    }
-
-    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
-    return;
-
- found:
-    spin_lock(&blkif->vbd_lock);
-    rb_erase(rb, &blkif->vbd_rb);
-    spin_unlock(&blkif->vbd_lock);
+}
+
+void vbd_free(blkif_t *blkif, struct vbd *vbd)
+{
+    if (vbd_is_active(vbd)) {
+       spin_lock(&blkif->vbd_lock);
+       rb_erase(&vbd->rb, &blkif->vbd_rb);
+       spin_unlock(&blkif->vbd_lock);
+       blkif_put(blkif);
+    }
     bdev_put(vbd->bdev);
     kfree(vbd);
 }
-
 
 void destroy_all_vbds(blkif_t *blkif)
 {
@@ -183,73 +174,11 @@
         bdev_put(vbd->bdev);
         kfree(vbd);
         spin_lock(&blkif->vbd_lock);
+        blkif_put(blkif);
     }
 
     spin_unlock(&blkif->vbd_lock);
 }
-
-
-static void vbd_probe_single(
-    blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
-{
-    vbd_info->device      = vbd->vdevice; 
-    vbd_info->info        = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
-    vbd_info->capacity    = vbd_sz(vbd);
-    vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
-}
-
-
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
-{
-    int        rc = 0, nr_vbds = 0;
-    rb_node_t *rb;
-
-    spin_lock(&blkif->vbd_lock);
-
-    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
-        goto out;
-
- new_subtree:
-    /* STEP 1. Find least node (it'll be left-most). */
-    while ( rb->rb_left != NULL )
-        rb = rb->rb_left;
-
-    for ( ; ; )
-    {
-        /* STEP 2. Dealt with left subtree. Now process current node. */
-        vbd_probe_single(blkif, &vbd_info[nr_vbds],
-                         rb_entry(rb, struct vbd, rb));
-        if ( ++nr_vbds == max_vbds )
-            goto out;
-
-        /* STEP 3. Process right subtree, if any. */
-        if ( rb->rb_right != NULL )
-        {
-            rb = rb->rb_right;
-            goto new_subtree;
-        }
-
-        /* STEP 4. Done both subtrees. Head back through ancesstors. */
-        for ( ; ; ) 
-        {
-            /* We're done when we get back to the root node. */
-            if ( rb->rb_parent == NULL )
-                goto out;
-            /* If we are left of parent, then parent is next to process. */
-            if ( rb->rb_parent->rb_left == rb )
-                break;
-            /* If we are right of parent, then we climb to grandparent. */
-            rb = rb->rb_parent;
-        }
-
-        rb = rb->rb_parent;
-    }
-
- out:
-    spin_unlock(&blkif->vbd_lock);
-    return (rc == 0) ? nr_vbds : rc;  
-}
-
 
 int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
 {
@@ -264,9 +193,9 @@
     while ( rb != NULL )
     {
         vbd = rb_entry(rb, struct vbd, rb);
-        if ( req->dev < vbd->vdevice )
+        if ( req->dev < vbd->handle )
             rb = rb->rb_left;
-        else if ( req->dev > vbd->vdevice )
+        else if ( req->dev > vbd->handle )
             rb = rb->rb_right;
         else
             goto found;
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c      Fri Aug 19 
18:19:28 2005
@@ -53,8 +53,8 @@
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
 #include <asm-xen/evtchn.h>
+#include <asm-xen/xenbus.h>
 #ifdef CONFIG_XEN_BLKDEV_GRANT
 #include <asm-xen/xen-public/grant_table.h>
 #include <asm-xen/gnttab.h>
@@ -65,22 +65,14 @@
 /* Control whether runtime update of vbds is enabled. */
 #define ENABLE_VBD_UPDATE 1
 
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
-
 #define BLKIF_STATE_CLOSED       0
 #define BLKIF_STATE_DISCONNECTED 1
 #define BLKIF_STATE_CONNECTED    2
 
-static int blkif_handle = 0;
 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
 static unsigned int blkif_evtchn = 0;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
+static unsigned int blkif_vbds = 0;
+static unsigned int blkif_vbds_connected = 0;
 
 static blkif_front_ring_t blk_ring;
 
@@ -92,6 +84,7 @@
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
     (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
 #define GRANTREF_INVALID (1<<15)
+static int shmem_ref;
 #endif
 
 static struct blk_shadow {
@@ -105,7 +98,7 @@
 
 static void kick_pending_request_queues(void);
 
-int __init xlblk_init(void);
+static int __init xlblk_init(void);
 
 static void blkif_completion(struct blk_shadow *s);
 
@@ -179,19 +172,6 @@
 
 module_init(xlblk_init);
 
-#if ENABLE_VBD_UPDATE
-static void update_vbds_task(void *unused)
-{ 
-    xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
-    static DECLARE_WORK(update_tq, update_vbds_task, NULL);
-    schedule_work(&update_tq);
-}
-#endif /* ENABLE_VBD_UPDATE */
-
 static struct xlbd_disk_info *head_waiting = NULL;
 static void kick_pending_request_queues(void)
 {
@@ -221,16 +201,7 @@
 
 int blkif_release(struct inode *inode, struct file *filep)
 {
-    struct gendisk *gd = inode->i_bdev->bd_disk;
-    struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
-    /*
-     * When usage drops to zero it may allow more VBD updates to occur.
-     * Update of usage count is protected by a per-device semaphore.
-     */
-    if ( --di->mi->usage == 0 )
-        vbd_update();
-
+    /* FIXME: This is where we can actually free up majors, etc. --RR */
     return 0;
 }
 
@@ -301,7 +272,7 @@
     ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
         BLKIF_OP_READ;
     ring_req->sector_number = (blkif_sector_t)req->sector;
-    ring_req->device = di->xd_device;
+    ring_req->handle = di->handle;
 
     ring_req->nr_segments = 0;
     rq_for_each_bio(bio, req)
@@ -446,10 +417,6 @@
             end_that_request_last(req);
 
             break;
-        case BLKIF_OP_PROBE:
-            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
-            blkif_control_rsp_valid = 1;
-            break;
         default:
             BUG();
         }
@@ -483,28 +450,6 @@
 #define blkif_io_lock io_request_lock
 
 
/*============================================================================*/
-#if ENABLE_VBD_UPDATE
-
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- *  Schedule a task for keventd to run, which will update the VBDs and perform 
- *  the corresponding updates to our view of VBD state.
- */
-static void update_vbds_task(void *unused)
-{ 
-    xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
-    static struct tq_struct update_tq;
-    update_tq.routine = update_vbds_task;
-    schedule_task(&update_tq);
-}
-
-#endif /* ENABLE_VBD_UPDATE */
-/*============================================================================*/
-
 static void kick_pending_request_queues(void)
 {
     /* We kick pending request queues if the ring is reasonably empty. */
@@ -757,7 +702,8 @@
                                char *          buffer,
                                unsigned long   sector_number,
                                unsigned short  nr_sectors,
-                               kdev_t          device)
+                               kdev_t          device,
+                              blkif_vdev_t    handle)
 {
     unsigned long       buffer_ma = virt_to_bus(buffer);
     unsigned long       xid;
@@ -871,7 +817,7 @@
     req->id            = xid;
     req->operation     = operation;
     req->sector_number = (blkif_sector_t)sector_number;
-    req->device        = device; 
+    req->handle        = handle; 
     req->nr_segments   = 1;
 #ifdef CONFIG_XEN_BLKDEV_GRANT
     /* install a grant reference. */
@@ -1047,108 +993,10 @@
 
 /*****************************  COMMON CODE  *******************************/
 
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
-                              unsigned long address)
-{
-    int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
-    ASSERT( ref != -ENOSPC );
-
-    gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
-
-    req->frame_and_sects[0] = blkif_fas_from_gref(ref, 0, (PAGE_SIZE/512)-1);
-
-    blkif_control_send(req, rsp);
-}
-#endif
-
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
-    unsigned long flags, id;
-    blkif_request_t *req_d;
-
- retry:
-    while ( RING_FULL(&blk_ring) )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    spin_lock_irqsave(&blkif_io_lock, flags);
-    if ( RING_FULL(&blk_ring) )
-    {
-        spin_unlock_irqrestore(&blkif_io_lock, flags);
-        goto retry;
-    }
-
-    DISABLE_SCATTERGATHER();
-    req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
-    *req_d = *req;    
-
-    id = GET_ID_FROM_FREELIST();
-    req_d->id = id;
-    blk_shadow[id].request = (unsigned long)req;
-
-    pickle_request(&blk_shadow[id], req);
-
-    blk_ring.req_prod_pvt++;
-    flush_requests();
-
-    spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-    while ( !blkif_control_rsp_valid )
-    {
-        set_current_state(TASK_INTERRUPTIBLE);
-        schedule_timeout(1);
-    }
-
-    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
-    blkif_control_rsp_valid = 0;
-}
-
-
-/* Send a driver status notification to the domain controller. */
-static void send_driver_status(int ok)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
-        .length  = sizeof(blkif_fe_driver_status_t),
-    };
-    blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-    
-    msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Tell the controller to bring up the interface. */
-static void blkif_send_interface_connect(void)
-{
-    ctrl_msg_t cmsg = {
-        .type    = CMSG_BLKIF_FE,
-        .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
-        .length  = sizeof(blkif_fe_interface_connect_t),
-    };
-    blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-    
-    msg->handle      = 0;
-    msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
-    
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    msg->shmem_ref   = gnttab_claim_grant_reference( &gref_head, gref_terminal 
);
-    ASSERT( msg->shmem_ref != -ENOSPC );
-    gnttab_grant_foreign_access_ref ( msg->shmem_ref , rdomid, 
msg->shmem_frame, 0 );
-#endif
-
-    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
 static void blkif_free(void)
 {
     /* Prevent new requests being issued until we fix things up. */
     spin_lock_irq(&blkif_io_lock);
-    recovery = 1;
     blkif_state = BLKIF_STATE_DISCONNECTED;
     spin_unlock_irq(&blkif_io_lock);
 
@@ -1160,31 +1008,6 @@
     }
     unbind_evtchn_from_irqhandler(blkif_evtchn, NULL);
     blkif_evtchn = 0;
-}
-
-static void blkif_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_disconnect(void)
-{
-    blkif_sring_t *sring;
-    
-    if ( blk_ring.sring != NULL )
-        free_page((unsigned long)blk_ring.sring);
-    
-    sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
-    SHARED_RING_INIT(sring);
-    FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
-    blkif_state  = BLKIF_STATE_DISCONNECTED;
-    blkif_send_interface_connect();
-}
-
-static void blkif_reset(void)
-{
-    blkif_free();
-    blkif_disconnect();
 }
 
 static void blkif_recover(void)
@@ -1257,11 +1080,14 @@
     blkif_state = BLKIF_STATE_CONNECTED;
 }
 
-static void blkif_connect(blkif_fe_interface_status_t *status)
+static void blkif_connect(u16 evtchn, domid_t domid)
 {
     int err = 0;
 
-    blkif_evtchn = status->evtchn;
+    blkif_evtchn = evtchn;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+    rdomid       = domid;
+#endif
 
     err = bind_evtchn_to_irqhandler(
         blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
@@ -1270,142 +1096,318 @@
         WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
         return;
     }
-
-    if ( recovery ) 
-    {
-        blkif_recover();
-    } 
-    else 
-    {
-        /* Transition to connected in case we need to do 
-         *  a partition probe on a whole disk. */
-        blkif_state = BLKIF_STATE_CONNECTED;
-        
-        /* Probe for discs attached to the interface. */
-        xlvbd_init();
-    }
-    
-    /* Kick pending requests. */
-    spin_lock_irq(&blkif_io_lock);
-    kick_pending_request_queues();
-    spin_unlock_irq(&blkif_io_lock);
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
-    DPRINTK(" Unexpected blkif status %u in state %u\n", 
-            status->status, blkif_state);
-}
-
-static void blkif_status(blkif_fe_interface_status_t *status)
-{
+}
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+       { "vbd" },
+       { "" }
+};
+
+struct blkfront_info
+{
+       /* We watch the backend */
+       struct xenbus_watch watch;
+       int vdevice;
+       u16 handle;
+       int connected;
+       struct xenbus_device *dev;
+       char *backend;
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+       struct blkfront_info *info;
+       unsigned int binfo;
+       unsigned long sectors, sector_size;
+       int err;
+
+       info = container_of(watch, struct blkfront_info, watch);
+       node += strlen(watch->node);
+
+       /* FIXME: clean up when error on the other end. */
+       if (info->connected)
+               return;
+
+       err = xenbus_gather(watch->node, 
+                           "sectors", "%lu", &sectors,
+                           "info", "%u", &binfo,
+                           "sector-size", "%lu", &sector_size,
+                           NULL);
+
+       if (err)
+               xenbus_dev_error(info->dev, err, "reading backend fields");
+       else {
+               xlvbd_add(sectors, info->vdevice, info->handle, binfo,
+                         sector_size);
+               info->connected = 1;
+
+               /* First to connect?  blkif is now connected. */
+               if (blkif_vbds_connected++ == 0)
+                       blkif_state = BLKIF_STATE_CONNECTED;
+
+               xenbus_dev_ok(info->dev);
+
+               /* Kick pending requests. */
+               spin_lock_irq(&blkif_io_lock);
+               kick_pending_request_queues();
+               spin_unlock_irq(&blkif_io_lock);
+       }
+}
+
+static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id)
+{
+       blkif_sring_t *sring;
+       evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+       int err;
+
+       sring = (void *)__get_free_page(GFP_KERNEL);
+       if (!sring) {
+               xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
+               return -ENOMEM;
+       }
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
+
 #ifdef CONFIG_XEN_BLKDEV_GRANT
-    rdomid       = status->domid; /* need to set rdomid early */
-#endif
-
-    if ( status->handle != blkif_handle )
-    {
-        WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
-        unexpected(status);
-        return;
-    }
-
-    switch ( status->status ) 
-    {
-    case BLKIF_INTERFACE_STATUS_CLOSED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_close();
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            blkif_disconnect();
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-        case BLKIF_STATE_CONNECTED:
-            /* unexpected(status); */ /* occurs during suspend/resume */
-            blkif_reset();
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_CONNECTED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-            unexpected(status);
-            blkif_disconnect();
-            blkif_connect(status);
-            break;
-        case BLKIF_STATE_DISCONNECTED:
-            blkif_connect(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            unexpected(status);
-            blkif_connect(status);
-            break;
-        }
-        break;
-
-    case BLKIF_INTERFACE_STATUS_CHANGED:
-        switch ( blkif_state )
-        {
-        case BLKIF_STATE_CLOSED:
-        case BLKIF_STATE_DISCONNECTED:
-            unexpected(status);
-            break;
-        case BLKIF_STATE_CONNECTED:
-            vbd_update();
-            break;
-        }
-        break;
-
-    default:
-        WPRINTK(" Invalid blkif status: %d\n", status->status);
-        break;
-    }
-}
-
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
-    switch ( msg->subtype )
-    {
-    case CMSG_BLKIF_FE_INTERFACE_STATUS:
-        blkif_status((blkif_fe_interface_status_t *)
-                     &msg->msg[0]);
-        break;
-    default:
-        msg->length = 0;
-        break;
-    }
-
-    ctrl_if_send_response(msg);
-}
-
-int wait_for_blkif(void)
+       shmem_ref = gnttab_claim_grant_reference(&gref_head,
+                                                gref_terminal);
+       ASSERT(shmem_ref != -ENOSPC);
+       gnttab_grant_foreign_access_ref(shmem_ref,
+                                       backend_id,
+                                       virt_to_mfn(blk_ring.sring),
+                                       0);
+#endif
+
+       op.u.alloc_unbound.dom = backend_id;
+       err = HYPERVISOR_event_channel_op(&op);
+       if (err) {
+               free_page((unsigned long)blk_ring.sring);
+               blk_ring.sring = 0;
+               xenbus_dev_error(dev, err, "allocating event channel");
+               return err;
+       }
+       blkif_connect(op.u.alloc_unbound.port, backend_id);
+       return 0;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+                          struct blkfront_info *info)
+{
+       char *backend;
+       const char *message;
+       int err, backend_id;
+
+       backend = xenbus_read(dev->nodename, "backend", NULL);
+       if (IS_ERR(backend)) {
+               err = PTR_ERR(backend);
+               if (err == -ENOENT)
+                       goto out;
+               xenbus_dev_error(dev, err, "reading %s/backend",
+                                dev->nodename);
+               goto out;
+       }
+       if (strlen(backend) == 0) {
+               err = -ENOENT;
+               goto free_backend;
+       }
+
+       /* FIXME: This driver can't handle backends on different
+        * domains.  Check and fail gracefully. */
+       err = xenbus_scanf(dev->nodename, "backend-id", "%i", &backend_id);
+       if (err == -ENOENT)
+               goto free_backend;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading %s/backend-id",
+                                dev->nodename);
+               goto free_backend;
+       }
+
+       /* First device?  We create shared ring, alloc event channel. */
+       if (blkif_vbds == 0) {
+               err = setup_blkring(dev, backend_id);
+               if (err)
+                       goto free_backend;
+       }
+
+       err = xenbus_transaction_start(dev->nodename);
+       if (err) {
+               xenbus_dev_error(dev, err, "starting transaction");
+               goto destroy_blkring;
+       }
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+       err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref);
+       if (err) {
+               message = "writing grant-id";
+               goto abort_transaction;
+       }
+#else
+       err = xenbus_printf(dev->nodename, "shared-frame", "%lu",
+                           virt_to_mfn(blk_ring.sring));
+       if (err) {
+               message = "writing shared-frame";
+               goto abort_transaction;
+       }
+#endif
+       err = xenbus_printf(dev->nodename,
+                           "event-channel", "%u", blkif_evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       info->watch.node = info->backend = backend;
+       info->watch.callback = watch_for_status;
+
+       err = register_xenbus_watch(&info->watch);
+       if (err) {
+               message = "registering watch on backend";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(0);
+       if (err) {
+               xenbus_dev_error(dev, err, "completing transaction");
+               goto destroy_blkring;
+       }
+       return 0;
+
+abort_transaction:
+       xenbus_transaction_end(1);
+       /* Have to do this *outside* transaction.  */
+       xenbus_dev_error(dev, err, "%s", message);
+destroy_blkring:
+       if (blkif_vbds == 0)
+               blkif_free();
+free_backend:
+       kfree(backend);
+out:
+       printk("%s:%u = %i\n", __FILE__, __LINE__, err);
+       return err;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+   We place an event channel and shared frame entries.
+   We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+                         const struct xenbus_device_id *id)
+{
+       int err;
+       struct blkfront_info *info;
+       int vdevice;
+
+       /* FIXME: Use dynamic device id if this is not set. */
+       err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+       if (err == -ENOENT)
+               return err;
+       if (err < 0) {
+               xenbus_dev_error(dev, err, "reading virtual-device");
+               return err;
+       }
+
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               xenbus_dev_error(dev, err, "allocating info structure");
+               return err;
+       }
+       info->dev = dev;
+       info->vdevice = vdevice;
+       info->connected = 0;
+       /* Front end dir is a number, which is used as the id. */
+       info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+       dev->data = info;
+
+       err = talk_to_backend(dev, info);
+       if (err) {
+               kfree(info);
+               return err;
+       }
+
+       /* Call once in case entries already there. */
+       watch_for_status(&info->watch, info->watch.node);
+       blkif_vbds++;
+       return 0;
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+
+       if (info->backend)
+               unregister_xenbus_watch(&info->watch);
+
+       if (info->connected) {
+               xlvbd_del(info->handle);
+               blkif_vbds_connected--;
+       }
+       kfree(info->backend);
+       kfree(info);
+
+       if (--blkif_vbds == 0)
+               blkif_free();
+
+       return 0;
+}
+
+static int blkfront_suspend(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+
+       unregister_xenbus_watch(&info->watch);
+       kfree(info->backend);
+       info->backend = NULL;
+
+       if (--blkif_vbds == 0) {
+               recovery = 1;
+               blkif_free();
+       }
+
+       return 0;
+}
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->data;
+       int err;
+
+       /* FIXME: Check geometry hasn't changed here... */
+       err = talk_to_backend(dev, info);
+       if (!err) {
+               if (blkif_vbds++ == 0)
+                       blkif_recover();
+       }
+       return err;
+}
+
+static struct xenbus_driver blkfront = {
+       .name = "vbd",
+       .owner = THIS_MODULE,
+       .ids = blkfront_ids,
+       .probe = blkfront_probe,
+       .remove = blkfront_remove,
+       .resume = blkfront_resume,
+       .suspend = blkfront_suspend,
+};
+
+static void __init init_blk_xenbus(void)
+{
+       xenbus_register_device(&blkfront);
+}
+
+static int wait_for_blkif(void)
 {
     int err = 0;
     int i;
-    send_driver_status(1);
 
     /*
      * We should read 'nr_interfaces' from response message and wait
      * for notifications before proceeding. For now we assume that we
      * will be notified of exactly one interface.
      */
-    for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+    for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ )
     {
         set_current_state(TASK_INTERRUPTIBLE);
         schedule_timeout(1);
@@ -1419,7 +1421,7 @@
     return err;
 }
 
-int __init xlblk_init(void)
+static int __init xlblk_init(void)
 {
     int i;
 
@@ -1443,27 +1445,11 @@
         blk_shadow[i].req.id = i+1;
     blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
 
-    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
-                                    CALLBACK_IN_BLOCKING_CONTEXT);
+    init_blk_xenbus();
 
     wait_for_blkif();
 
     return 0;
-}
-
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    int i, j;
-    for ( i = 0; i < BLK_RING_SIZE; i++ )
-        for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
-            blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
-#endif
-    send_driver_status(1);
 }
 
 static void blkif_completion(struct blk_shadow *s)
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 19 18:19:28 2005
@@ -100,6 +100,7 @@
 
 struct xlbd_disk_info {
     int xd_device;
+    blkif_vdev_t handle;
     struct xlbd_major_info *mi;
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
     struct xlbd_disk_info  *next_waiting;
@@ -119,17 +120,10 @@
                        unsigned command, unsigned long argument);
 extern int blkif_check(dev_t dev);
 extern int blkif_revalidate(dev_t dev);
-extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-extern void blkif_control_probe_send(
-    blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
-#endif
 extern void do_blkif_request (request_queue_t *rq); 
 
-extern void xlvbd_update_vbds(void);
-
 /* Virtual block-device subsystem. */
-extern int  xlvbd_init(void);
-extern void xlvbd_cleanup(void); 
-
+int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
+             u16 info, u16 sector_size);
+void xlvbd_del(blkif_vdev_t handle);
 #endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c   Fri Aug 19 18:19:28 2005
@@ -46,8 +46,9 @@
 struct lvdisk
 {
     blkif_sector_t capacity; /*  0: Size in terms of 512-byte sectors.   */
-    blkif_vdev_t   device;   /*  8: Device number (opaque 16 bit value). */
-    u16            info; 
+    blkif_vdev_t   handle;   /*  8: Device number (opaque 16 bit value). */
+    u16            info;
+    dev_t          dev;
     struct list_head list;
 };
 
@@ -85,7 +86,7 @@
 
 /* Information about our VBDs. */
 #define MAX_VBDS 64
-struct list_head vbds_list;
+static LIST_HEAD(vbds_list);
 
 #define MAJOR_XEN(dev) ((dev)>>8)
 #define MINOR_XEN(dev) ((dev) & 0xff)
@@ -116,49 +117,6 @@
 {
     list_del(&disk->list);
     kfree(disk);
-}
-
-static vdisk_t *xlvbd_probe(int *ret)
-{
-    blkif_response_t rsp;
-    blkif_request_t req;
-    vdisk_t *disk_info = NULL;
-    unsigned long buf;
-    int nr;
-
-    buf = __get_free_page(GFP_KERNEL);
-    if ((void *)buf == NULL)
-        goto out;
-
-    memset(&req, 0, sizeof(req));
-    req.operation = BLKIF_OP_PROBE;
-    req.nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-    blkif_control_probe_send(&req, &rsp,
-                             (unsigned long)(virt_to_machine(buf)));
-#else
-    req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0, 
(PAGE_SIZE/512)-1);
-
-    blkif_control_send(&req, &rsp);
-#endif
-    if ( rsp.status <= 0 ) {
-        WPRINTK("Could not probe disks (%d)\n", rsp.status);
-        goto out;
-    }
-    nr = rsp.status;
-    if ( nr > MAX_VBDS )
-        nr = MAX_VBDS;
-
-    disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL);
-    if (disk_info != NULL)
-        memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t));
-
-    if (ret != NULL)
-        *ret = nr;
-
-out:
-    free_page(buf);
-    return disk_info;
 }
 
 static struct xlbd_major_info *xlbd_alloc_major_info(
@@ -189,6 +147,7 @@
         break;
     }
     
+    printk("Registering block device major %i\n", ptr->major);
     if (register_blkdev(ptr->major, ptr->type->devname)) {
         WPRINTK("can't get major %d with name %s\n",
                 ptr->major, ptr->type->devname);
@@ -231,7 +190,7 @@
             xlbd_alloc_major_info(major, minor, index));
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 {
     request_queue_t *rq;
 
@@ -242,7 +201,7 @@
     elevator_init(rq, "noop");
 
     /* Hard sector size and max sectors impersonate the equiv. hardware. */
-    blk_queue_hardsect_size(rq, disk->sector_size);
+    blk_queue_hardsect_size(rq, sector_size);
     blk_queue_max_sectors(rq, 512);
 
     /* Each segment in a request is up to an aligned page in size. */
@@ -261,8 +220,9 @@
     return 0;
 }
 
-struct gendisk *xlvbd_alloc_gendisk(
-    struct xlbd_major_info *mi, int minor, vdisk_t *disk)
+static struct gendisk *xlvbd_alloc_gendisk(
+    struct xlbd_major_info *mi, int minor, blkif_sector_t capacity,
+    int device, blkif_vdev_t handle, u16 info, u16 sector_size)
 {
     struct gendisk *gd;
     struct xlbd_disk_info *di;
@@ -273,7 +233,8 @@
         return NULL;
     memset(di, 0, sizeof(*di));
     di->mi = mi;
-    di->xd_device = disk->device;
+    di->xd_device = device;
+    di->handle = handle;
 
     if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
         nr_minors = 1 << mi->type->partn_shift;
@@ -296,22 +257,22 @@
     gd->first_minor = minor;
     gd->fops = &xlvbd_block_fops;
     gd->private_data = di;
-    set_capacity(gd, disk->capacity);
-
-    if (xlvbd_init_blk_queue(gd, disk)) {
+    set_capacity(gd, capacity);
+
+    if (xlvbd_init_blk_queue(gd, sector_size)) {
         del_gendisk(gd);
         goto out;
     }
 
     di->rq = gd->queue;
 
-    if (disk->info & VDISK_READONLY)
+    if (info & VDISK_READONLY)
         set_disk_ro(gd, 1);
 
-    if (disk->info & VDISK_REMOVABLE)
+    if (info & VDISK_REMOVABLE)
         gd->flags |= GENHD_FL_REMOVABLE;
 
-    if (disk->info & VDISK_CDROM)
+    if (info & VDISK_CDROM)
         gd->flags |= GENHD_FL_CD;
 
     add_disk(gd);
@@ -323,38 +284,36 @@
     return NULL;
 }
 
-static int xlvbd_device_add(struct list_head *list, vdisk_t *disk)
+int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
+             u16 info, u16 sector_size)
 {
     struct lvdisk *new;
-    int minor;
-    dev_t device;
     struct block_device *bd;
     struct gendisk *gd;
     struct xlbd_major_info *mi;
 
-    mi = xlbd_get_major_info(disk->device);
+    mi = xlbd_get_major_info(device);
     if (mi == NULL)
         return -EPERM;
 
     new = xlvbd_device_alloc();
     if (new == NULL)
-        return -1;
-    new->capacity = disk->capacity;
-    new->device = disk->device;
-    new->info = disk->info;
-    
-    minor = MINOR_XEN(disk->device);
-    device = MKDEV(mi->major, minor);
-    
-    bd = bdget(device);
+        return -ENOMEM;
+    new->capacity = capacity;
+    new->info = info;
+    new->handle = handle;
+    new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device));
+
+    bd = bdget(new->dev);
     if (bd == NULL)
         goto out;
     
-    gd = xlvbd_alloc_gendisk(mi, minor, disk);
+    gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle,
+                            info, sector_size);
     if (gd == NULL)
         goto out_bd;
 
-    list_add(&new->list, list);
+    list_add(&new->list, &vbds_list);
 out_bd:
     bdput(bd);
 out:
@@ -363,27 +322,26 @@
 
 static int xlvbd_device_del(struct lvdisk *disk)
 {
-    dev_t device;
     struct block_device *bd;
     struct gendisk *gd;
     struct xlbd_disk_info *di;
     int ret = 0, unused;
     request_queue_t *rq;
 
-    device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
-
-    bd = bdget(device);
+    bd = bdget(disk->dev);
     if (bd == NULL)
         return -1;
 
-    gd = get_gendisk(device, &unused);
+    gd = get_gendisk(disk->dev, &unused);
     di = gd->private_data;
 
+#if 0 /* This is wrong: hda and hdb share same major, for example. */
     if (di->mi->usage != 0) {
-        WPRINTK("disk removal failed: used [dev=%x]\n", device);
+        WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev);
         ret = -1;
         goto out;
     }
+#endif
 
     rq = gd->queue;
     del_gendisk(gd);
@@ -391,110 +349,19 @@
     blk_cleanup_queue(rq);
 
     xlvbd_device_free(disk);
-out:
     bdput(bd);
     return ret;
 }
 
-static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk)
-{
-    dev_t device;
-    struct block_device *bd;
-    struct gendisk *gd;
-    int unused;
-
-    if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info))
-        return 0;    
-
-    device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device));
-
-    bd = bdget(device);
-    if (bd == NULL)
-        return -1;
-
-    gd = get_gendisk(device, &unused);
-    set_capacity(gd, disk->capacity);    
-    ldisk->capacity = disk->capacity;
-
-    bdput(bd);
-
-    return 0;
-}
-
-void xlvbd_refresh(void)
-{
-    vdisk_t *newdisks;
-    struct list_head *tmp, *tmp2;
-    struct lvdisk *disk;
-    int i, nr;
-
-    newdisks = xlvbd_probe(&nr);
-    if (newdisks == NULL) {
-        WPRINTK("failed to probe\n");
-        return;
-    }
-    
-    i = 0;
-    list_for_each_safe(tmp, tmp2, &vbds_list) {
-        disk = list_entry(tmp, struct lvdisk, list);
-        
-        for (i = 0; i < nr; i++) {
-            if ( !newdisks[i].device )
-                continue;
-            if ( disk->device == newdisks[i].device ) {
-                xlvbd_device_update(disk, &newdisks[i]);
-                newdisks[i].device = 0;
-                break;
-            }
-        }
-        if (i == nr) {
-            xlvbd_device_del(disk);
-            newdisks[i].device = 0;
-        }
-    }
-    for (i = 0; i < nr; i++)
-        if ( newdisks[i].device )
-            xlvbd_device_add(&vbds_list, &newdisks[i]);
-    kfree(newdisks);
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
-    xlvbd_refresh();
-}
-
-/*
- * Set up all the linux device goop for the virtual block devices
- * (vbd's) that we know about. Note that although from the backend
- * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
- * number, the domain creation tools conventionally allocate these
- * numbers to correspond to those used by 'real' linux -- this is just
- * for convenience as it means e.g. that the same /etc/fstab can be
- * used when booting with or without Xen.
- */
-int xlvbd_init(void)
-{
-    int i, nr;
-    vdisk_t *disks;
-
-    INIT_LIST_HEAD(&vbds_list);
-
-    memset(major_info, 0, sizeof(major_info));
-    
-    disks = xlvbd_probe(&nr);
-    if (disks == NULL) {
-        WPRINTK("failed to probe\n");
-        return -1;
-    }
-
-    for (i = 0; i < nr; i++)
-        xlvbd_device_add(&vbds_list, &disks[i]);
-
-    kfree(disks);
-    return 0;
-}
+void xlvbd_del(blkif_vdev_t handle)
+{
+       struct lvdisk *i;
+
+       list_for_each_entry(i, &vbds_list, list) {
+               if (i->handle == handle) {
+                       xlvbd_device_del(i);
+                       return;
+               }
+       }
+       BUG();
+}
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h  Fri Aug 19 18:19:28 2005
@@ -87,7 +87,7 @@
     struct work_struct work;
 #ifdef CONFIG_XEN_BLKDEV_GRANT
     u16 shmem_handle;
-    memory_t shmem_vaddr;
+    unsigned long shmem_vaddr;
     grant_ref_t shmem_ref;
 #endif
 } blkif_t;
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c       Thu Aug 
18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c       Fri Aug 
19 18:19:28 2005
@@ -320,7 +320,7 @@
     };
     blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
     msg->handle      = 0;
-    msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+    msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring);
     
     ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
 }
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Aug 19 18:19:28 2005
@@ -49,13 +49,13 @@
     unsigned long    tx_shmem_frame;
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
     u16              tx_shmem_handle;
-    memory_t         tx_shmem_vaddr; 
+    unsigned long    tx_shmem_vaddr; 
     grant_ref_t      tx_shmem_ref; 
 #endif
     unsigned long    rx_shmem_frame;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
     u16              rx_shmem_handle;
-    memory_t         rx_shmem_vaddr; 
+    unsigned long    rx_shmem_vaddr; 
     grant_ref_t      rx_shmem_ref; 
 #endif
     unsigned int     evtchn;
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c        Fri Aug 19 
18:19:28 2005
@@ -43,7 +43,7 @@
 static int  make_rx_response(netif_t *netif, 
                              u16      id, 
                              s8       st,
-                             memory_t addr,
+                             unsigned long addr,
                              u16      size,
                              u16      csum_valid);
 
@@ -251,7 +251,7 @@
 #else
     struct mmuext_op *mmuext;
 #endif
-    unsigned long vdata, mdata, new_mfn;
+    unsigned long vdata, old_mfn, new_mfn;
     struct sk_buff_head rxq;
     struct sk_buff *skb;
     u16 notify_list[NETIF_RX_RING_SIZE];
@@ -271,7 +271,7 @@
     {
         netif   = netdev_priv(skb->dev);
         vdata   = (unsigned long)skb->data;
-        mdata   = virt_to_machine(vdata);
+        old_mfn = virt_to_mfn(vdata);
 
         /* Memory squeeze? Back off for an arbitrary while. */
         if ( (new_mfn = alloc_mfn()) == 0 )
@@ -293,7 +293,7 @@
         mcl++;
 
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        gop->mfn = mdata >> PAGE_SHIFT;
+        gop->mfn = old_mfn;
         gop->domid = netif->domid;
         gop->handle = netif->rx->ring[
         MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
@@ -308,7 +308,7 @@
         mcl++;
 
         mmuext->cmd = MMUEXT_REASSIGN_PAGE;
-        mmuext->mfn = mdata >> PAGE_SHIFT;
+        mmuext->mfn = old_mfn;
         mmuext++;
 #endif
         mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -318,7 +318,7 @@
         __skb_queue_tail(&rxq, skb);
 
 #ifdef DEBUG_GRANT
-        dump_packet('a', mdata, vdata);
+        dump_packet('a', old_mfn, vdata);
 #endif
         /* Filled the batch queue? */
         if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
@@ -345,10 +345,8 @@
 
     mcl = rx_mcl;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-    if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
-                                           grant_rx_op, gop - grant_rx_op))) {
-        BUG();
-    }
+    BUG_ON(HYPERVISOR_grant_table_op(
+        GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
     gop = grant_rx_op;
 #else
     mmuext = rx_mmuext;
@@ -361,10 +359,9 @@
         /* Rederive the machine addresses. */
         new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        mdata = (unsigned long)skb->data & ~PAGE_MASK;
-#else
-        mdata   = ((mmuext[0].mfn << PAGE_SHIFT) |
-                   ((unsigned long)skb->data & ~PAGE_MASK));
+        old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+#else
+        old_mfn = mmuext[0].mfn;
 #endif
         atomic_set(&(skb_shinfo(skb)->dataref), 1);
         skb_shinfo(skb)->nr_frags = 0;
@@ -379,18 +376,20 @@
         /* Check the reassignment error code. */
         status = NETIF_RSP_OKAY;
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
-        BUG_ON(gop->status != 0);
+        BUG_ON(gop->status != 0); /* XXX */
 #else
         if ( unlikely(mcl[1].result != 0) )
         {
             DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
-            free_mfn(mdata >> PAGE_SHIFT);
+            free_mfn(old_mfn);
             status = NETIF_RSP_ERROR;
         }
 #endif
         evtchn = netif->evtchn;
         id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
-        if ( make_rx_response(netif, id, status, mdata,
+        if ( make_rx_response(netif, id, status,
+                              (old_mfn << PAGE_SHIFT) | /* XXX */
+                              ((unsigned long)skb->data & ~PAGE_MASK),
                               size, skb->proto_csum_valid) &&
              (rx_notify[evtchn] == 0) )
         {
@@ -888,7 +887,7 @@
 static int make_rx_response(netif_t *netif, 
                             u16      id, 
                             s8       st,
-                            memory_t addr,
+                            unsigned long addr,
                             u16      size,
                             u16      csum_valid)
 {
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c      Fri Aug 19 
18:19:28 2005
@@ -448,11 +448,10 @@
         }
         grant_rx_ref[id] = ref;
         gnttab_grant_foreign_transfer_ref(ref, rdomid,
-                                          virt_to_machine(
-                                              skb->head) >> PAGE_SHIFT);
+                                          virt_to_mfn(skb->head));
         np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
 #endif
-        rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+        rx_pfn_array[i] = virt_to_mfn(skb->head);
 
        /* Remove this page from pseudo phys map before passing back to Xen. */
        phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] 
@@ -543,13 +542,14 @@
         printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
         BUG();
     }
-    mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+    mfn = virt_to_mfn(skb->data);
     gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
-    tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+    tx->addr = ref << PAGE_SHIFT;
     grant_tx_ref[id] = ref;
 #else
-    tx->addr = virt_to_machine(skb->data);
-#endif
+    tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+    tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
     tx->size = skb->len;
     tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
 
@@ -720,7 +720,7 @@
     while ((skb = __skb_dequeue(&rxq)) != NULL) {
 #ifdef GRANT_DEBUG
         printk(KERN_ALERT "#### rx_poll     dequeue vdata=%p mfn=%lu\n",
-               skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+               skb->data, virt_to_mfn(skb->data));
         dump_packet('d', skb->data, (unsigned long)skb->data);
 #endif
         /*
@@ -854,18 +854,23 @@
      * interface has been down.
      */
     for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
-            if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
-                struct sk_buff *skb = np->tx_skbs[i];
-                
-                tx = &np->tx->ring[requeue_idx++].req;
-                
-                tx->id   = i;
-                tx->addr = virt_to_machine(skb->data);
-                tx->size = skb->len;
-                
-                np->stats.tx_bytes += skb->len;
-                np->stats.tx_packets++;
-            }
+        if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+            struct sk_buff *skb = np->tx_skbs[i];
+
+            tx = &np->tx->ring[requeue_idx++].req;
+
+            tx->id   = i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+            tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
+#else
+            tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+            tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
+            tx->size = skb->len;
+
+            np->stats.tx_bytes += skb->len;
+            np->stats.tx_packets++;
+        }
     }
     wmb();
     np->tx->req_prod = requeue_idx;
@@ -922,7 +927,7 @@
     netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
 
     msg->handle = np->handle;
-    msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
+    msg->tx_shmem_frame = virt_to_mfn(np->tx);
 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
     msg->tx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_tx_head, 
                                                             gref_tx_terminal);
@@ -934,7 +939,7 @@
                                      msg->tx_shmem_frame, 0);
 #endif
 
-    msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
+    msg->rx_shmem_frame = virt_to_mfn(np->rx);
 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
     msg->rx_shmem_ref   = (u32)gnttab_claim_grant_reference(&gref_rx_head, 
                                                             gref_rx_terminal);
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c        Fri Aug 19 
18:19:28 2005
@@ -657,8 +657,8 @@
         phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
             FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
 
-        ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
-               == buffer_mach + i << PAGE_SHIFT);
+        ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
+               == ((buffer_mach >> PAGE_SHIFT) + i));
     }
 
     if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c      Fri Aug 19 
18:19:28 2005
@@ -195,7 +195,7 @@
         }
 
         urb_priv->schedule = schedule;
-       req->iso_schedule = virt_to_machine(schedule);
+       req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
 
         return 0;
 }
@@ -212,7 +212,7 @@
 #if DEBUG
         printk(KERN_DEBUG
                "usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons 
= %d\n",
-               usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+               usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
                usbif->resp_prod, xhci->usb_resp_cons);
 #endif
         
@@ -232,7 +232,7 @@
         req->operation       = USBIF_OP_IO;
         req->port            = 0; /* We don't care what the port is. */
         req->id              = (unsigned long) urb->hcpriv;
-        req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+        req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
        req->devnum          = usb_pipedevice(urb->pipe);
         req->direction       = usb_pipein(urb->pipe);
        req->speed           = usb_pipeslow(urb->pipe);
@@ -280,7 +280,7 @@
        printk(KERN_DEBUG
                "queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
                "resp_cons = %d\n", usbif->req_prod,
-               virt_to_machine(&usbif->req_prod),
+               virt_to_mfn(&usbif->req_prod),
               usbif->resp_prod, xhci->usb_resp_cons);
 #endif
  
@@ -1555,7 +1555,7 @@
         cmsg.type      = CMSG_USBIF_FE;
         cmsg.subtype   = CMSG_USBIF_FE_INTERFACE_CONNECT;
         cmsg.length    = sizeof(usbif_fe_interface_connect_t);
-        up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+        up.shmem_frame = virt_to_mfn(sring);
         memcpy(cmsg.msg, &up, sizeof(up));
         
         /* Tell the controller to bring up the interface. */
@@ -1599,7 +1599,7 @@
 
        DPRINTK(KERN_INFO __FILE__
                 ": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
-                xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
+                xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
                 xhci->evtchn);
 
         xhci->state = USBIF_STATE_CONNECTED;
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c    Fri Aug 19 
18:19:28 2005
@@ -48,13 +48,12 @@
 
 static inline struct ringbuf_head *outbuf(void)
 {
-       return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT);
+       return mfn_to_virt(xen_start_info.store_mfn);
 }
 
 static inline struct ringbuf_head *inbuf(void)
 {
-       return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT)
-               + PAGE_SIZE/2;
+       return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2;
 }
 
 static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
@@ -219,8 +218,7 @@
        }
 
        /* FIXME zero out page -- domain builder should probably do this*/
-       memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT),
-              0, PAGE_SIZE);
+       memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE);
 
        return 0;
 }
diff -r 99914b54f7bf -r 81576d3d1ca8 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Thu Aug 18 
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Fri Aug 19 
18:19:28 2005
@@ -48,15 +48,7 @@
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
 {
        for (; !streq(arr->devicetype, ""); arr++) {
-               if (!streq(arr->devicetype, dev->devicetype))
-                       continue;
-
-               /* If they don't care what subtype, it's a match. */
-               if (streq(arr->subtype, ""))
-                       return arr;
-
-               /* If they care, device must have (same) subtype. */
-               if (dev->subtype && streq(arr->subtype, dev->subtype))
+               if (streq(arr->devicetype, dev->devicetype))
                        return arr;
        }
        return NULL;
@@ -72,10 +64,102 @@
        return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
 }
 
+struct xen_bus_type
+{
+       char *root;
+       unsigned int levels;
+       int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+       int (*probe)(const char *type, const char *dir);
+       struct bus_type bus;
+       struct device dev;
+};
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+       nodename = strchr(nodename, '/');
+       if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+               printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+               return -EINVAL;
+       }
+
+       strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+       if (!strchr(bus_id, '/')) {
+               printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+               return -EINVAL;
+       }
+       *strchr(bus_id, '/') = '-';
+       return 0;
+}
+
 /* Bus type for frontend drivers. */
-static struct bus_type xenbus_type = {
-       .name  = "xenbus",
-       .match = xenbus_match,
+static int xenbus_probe_frontend(const char *type, const char *name);
+static struct xen_bus_type xenbus_frontend = {
+       .root = "device",
+       .levels = 2,            /* device/type/<id> */
+       .get_bus_id = frontend_bus_id,
+       .probe = xenbus_probe_frontend,
+       .bus = {
+               .name  = "xen",
+               .match = xenbus_match,
+       },
+       .dev = {
+               .bus_id = "xen",
+       },
+};
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+       int domid, err;
+       const char *devid, *type, *frontend;
+       unsigned int typelen;
+
+       type = strchr(nodename, '/');
+       if (!type)
+               return -EINVAL;
+       type++;
+       typelen = strcspn(type, "/");
+       if (!typelen || type[typelen] != '/')
+               return -EINVAL;
+
+       devid = strrchr(nodename, '/') + 1;
+
+       err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+                           "frontend", NULL, &frontend,
+                           NULL);
+       if (err)
+               return err;
+       if (strlen(frontend) == 0)
+               err = -ERANGE;
+
+       if (!err && !xenbus_exists(frontend, ""))
+               err = -ENOENT;
+
+       if (err) {
+               kfree(frontend);
+               return err;
+       }
+
+       if (snprintf(bus_id, BUS_ID_SIZE,
+                    "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+               return -ENOSPC;
+       return 0;
+}
+
+static int xenbus_probe_backend(const char *type, const char *uuid);
+static struct xen_bus_type xenbus_backend = {
+       .root = "backend",
+       .levels = 3,            /* backend/type/<frontend>/<id> */
+       .get_bus_id = backend_bus_id,
+       .probe = xenbus_probe_backend,
+       .bus = {
+               .name  = "xen-backend",
+               .match = xenbus_match,
+       },
+       .dev = {
+               .bus_id = "xen-backend",
+       },
 };
 
 static int xenbus_dev_probe(struct device *_dev)
@@ -104,12 +188,13 @@
        return drv->remove(dev);
 }
 
-int xenbus_register_driver(struct xenbus_driver *drv)
+static int xenbus_register_driver(struct xenbus_driver *drv,
+                                 struct xen_bus_type *bus)
 {
        int err;
 
        drv->driver.name = drv->name;
-       drv->driver.bus = &xenbus_type;
+       drv->driver.bus = &bus->bus;
        drv->driver.owner = drv->owner;
        drv->driver.probe = xenbus_dev_probe;
        drv->driver.remove = xenbus_dev_remove;
@@ -120,6 +205,16 @@
        return err;
 }
 
+int xenbus_register_device(struct xenbus_driver *drv)
+{
+       return xenbus_register_driver(drv, &xenbus_frontend);
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+       return xenbus_register_driver(drv, &xenbus_backend);
+}
+
 void xenbus_unregister_driver(struct xenbus_driver *drv)
 {
        down(&xenbus_lock);
@@ -130,52 +225,98 @@
 struct xb_find_info
 {
        struct xenbus_device *dev;
-       const char *busid;
+       const char *nodename;
 };
 
 static int cmp_dev(struct device *dev, void *data)
 {
+       struct xenbus_device *xendev = to_xenbus_device(dev);
        struct xb_find_info *info = data;
 
-       if (streq(dev->bus_id, info->busid)) {
-               info->dev = container_of(get_device(dev),
-                                        struct xenbus_device, dev);
+       if (streq(xendev->nodename, info->nodename)) {
+               info->dev = xendev;
+               get_device(dev);
                return 1;
        }
        return 0;
 }
 
-/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */
-struct xenbus_device *xenbus_device_find(const char *busid)
-{
-       struct xb_find_info info = { .dev = NULL, .busid = busid };
-
-       bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev);
+struct xenbus_device *xenbus_device_find(const char *nodename,
+                                        struct bus_type *bus)
+{
+       struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+       bus_for_each_dev(bus, NULL, &info, cmp_dev);
        return info.dev;
 }
 
+static int cleanup_dev(struct devic