# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 81576d3d1ca891cdcd81fada9025b2279a974458
# Parent 99914b54f7bffc8c27757a1ac2bc7a0d97597ac8
# Parent 0608852073c86cfa432ac32cb9223531950be896
Merge.
diff -r 99914b54f7bf -r 81576d3d1ca8 Config.mk
--- a/Config.mk Thu Aug 18 18:40:02 2005
+++ b/Config.mk Fri Aug 19 18:19:28 2005
@@ -35,3 +35,11 @@
# Choose the best mirror to download linux kernel
KERNEL_REPO = http://www.kernel.org
+
+# ACM_USE_SECURITY_POLICY is set to security policy of Xen
+# Supported models are:
+# ACM_NULL_POLICY (ACM will not be built with this policy)
+# ACM_CHINESE_WALL_POLICY
+# ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+# ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ACM_USE_SECURITY_POLICY ?= ACM_NULL_POLICY
diff -r 99914b54f7bf -r 81576d3d1ca8 docs/src/user.tex
--- a/docs/src/user.tex Thu Aug 18 18:40:02 2005
+++ b/docs/src/user.tex Fri Aug 19 18:19:28 2005
@@ -1763,7 +1763,7 @@
physical address in the memory map will be ignored. This parameter
may be specified with a B, K, M or G suffix, representing bytes,
kilobytes, megabytes and gigabytes respectively. The
- default unit, if no suffix is specified, is bytes.
+ default unit, if no suffix is specified, is kilobytes.
\item [dom0\_mem=xxx ]
Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Fri Aug 19
18:19:28 2005
@@ -14,8 +14,7 @@
c-obj-y := semaphore.o vm86.o \
ptrace.o sys_i386.o \
- i387.o dmi_scan.o bootflag.o \
- doublefault.o
+ i387.o dmi_scan.o bootflag.o
s-obj-y :=
obj-y += cpu/
@@ -85,7 +84,7 @@
$(obj)/vsyscall-sysenter.o FORCE
$(call if_changed,syscall)
-c-link := init_task.o
+c-link :=
s-link := vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o
vsyscall.lds.o syscall_table.o
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Fri Aug 19
18:19:28 2005
@@ -569,7 +569,7 @@
for (va = gdt_descr->address, f = 0;
va < gdt_descr->address + gdt_descr->size;
va += PAGE_SIZE, f++) {
- frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S Fri Aug 19 18:19:28 2005
@@ -136,9 +136,6 @@
ENTRY(empty_zero_page)
.org 0x2000
-ENTRY(swapper_pg_dir)
-
-.org 0x3000
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* 0x0b reserved */
@@ -190,10 +187,10 @@
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault
TSS */
.fill GDT_ENTRIES-32,8,0
-.org 0x4000
+.org 0x3000
ENTRY(default_ldt)
-.org 0x5000
+.org 0x4000
/*
* Real beginning of normal "text" segment
*/
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c Fri Aug 19
18:19:28 2005
@@ -80,7 +80,7 @@
t->io_bitmap_ptr = bitmap;
op.cmd = PHYSDEVOP_SET_IOBITMAP;
- op.u.set_iobitmap.bitmap = (unsigned long)bitmap;
+ op.u.set_iobitmap.bitmap = (char *)bitmap;
op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
HYPERVISOR_physdev_op(&op);
}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Fri Aug 19 18:19:28 2005
@@ -198,7 +198,7 @@
{
struct mm_struct * mm = current->mm;
__u32 entry_1, entry_2, *lp;
- unsigned long mach_lp;
+ maddr_t mach_lp;
int error;
struct user_desc ldt_info;
@@ -245,7 +245,8 @@
/* Install the new entry ... */
install:
- error = HYPERVISOR_update_descriptor(mach_lp, entry_1, entry_2);
+ error = HYPERVISOR_update_descriptor(
+ mach_lp, (u64)entry_1 | ((u64)entry_2<<32));
out_unlock:
up(&mm->context.sem);
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Fri Aug 19
18:19:28 2005
@@ -115,20 +115,12 @@
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
- /* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
-
- /* We shouldn't have to disable interrupts while dead, but
- * some interrupts just don't seem to go away, and this makes
- * it "work" for testing purposes. */
/* Death loop */
while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
HYPERVISOR_yield();
- local_irq_disable();
__flush_tlb_all();
cpu_set(smp_processor_id(), cpu_online_map);
- local_irq_enable();
}
#else
static inline void play_dead(void)
@@ -156,12 +148,19 @@
rmb();
if (cpu_is_offline(cpu)) {
+ local_irq_disable();
+ /* Ack it. From this point on until
+ we get woken up, we're not allowed
+ to take any locks. In particular,
+ don't printk. */
+ __get_cpu_var(cpu_state) = CPU_DEAD;
#if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
/* Tell hypervisor to take vcpu down. */
HYPERVISOR_vcpu_down(cpu);
#endif
play_dead();
- }
+ local_irq_enable();
+ }
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
xen_idle();
@@ -523,16 +522,15 @@
* Load the per-thread Thread-Local Storage descriptor.
* This is load_TLS(next, cpu) with multicalls.
*/
-#define C(i) do { \
- if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
- next->tls_array[i].b != prev->tls_array[i].b)) { \
- mcl->op = __HYPERVISOR_update_descriptor; \
- mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
- [GDT_ENTRY_TLS_MIN + i]); \
- mcl->args[1] = ((u32 *)&next->tls_array[i])[0]; \
- mcl->args[2] = ((u32 *)&next->tls_array[i])[1]; \
- mcl++; \
- } \
+#define C(i) do { \
+ if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
+ next->tls_array[i].b != prev->tls_array[i].b)) { \
+ mcl->op = __HYPERVISOR_update_descriptor; \
+ *(u64 *)&mcl->args[0] = virt_to_machine( \
+ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
+ *(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \
+ mcl++; \
+ } \
} while (0)
C(0); C(1); C(2);
#undef C
@@ -549,7 +547,7 @@
iobmp_op.cmd =
PHYSDEVOP_SET_IOBITMAP;
iobmp_op.u.set_iobitmap.bitmap =
- (unsigned long)next->io_bitmap_ptr;
+ (char *)next->io_bitmap_ptr;
iobmp_op.u.set_iobitmap.nr_ports =
next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
mcl->op = __HYPERVISOR_physdev_op;
@@ -791,3 +789,10 @@
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+
+#ifndef CONFIG_X86_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Aug 19 18:19:28 2005
@@ -1604,11 +1604,10 @@
for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
{
pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ virt_to_mfn(&phys_to_machine_mapping[i]);
}
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
- virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
-
+ virt_to_mfn(pfn_to_mfn_frame_list);
/*
* NOTE: at this point the bootmem allocator is fully available.
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 19
18:19:28 2005
@@ -904,7 +904,7 @@
for (va = cpu_gdt_descr[cpu].address, f = 0;
va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ ctxt.gdt_frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
ctxt.gdt_ents = cpu_gdt_descr[cpu].size / 8;
@@ -920,7 +920,7 @@
ctxt.failsafe_callback_cs = __KERNEL_CS;
ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
- ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
+ ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT;
boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
printk("boot error: %ld\n", boot_error);
@@ -1616,3 +1616,21 @@
smp_intr_init();
local_setup_timer_irq();
}
+
+DECLARE_PER_CPU(int, timer_irq);
+
+void _restore_vcpu(void)
+{
+ int cpu = smp_processor_id();
+ extern atomic_t vcpus_rebooting;
+
+ /* We are the first thing the vcpu runs when it comes back,
+ and we are supposed to restore the IPIs and timer
+ interrupts etc. When we return, the vcpu's idle loop will
+ start up again. */
+ _bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
+ _bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
+ _bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
+ _bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu)
);
+ atomic_dec(&vcpus_rebooting);
+}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Fri Aug 19
18:19:28 2005
@@ -94,9 +94,6 @@
iotlb_nslabs = simple_strtoul(str, &str, 0) <<
(20 - IO_TLB_SHIFT);
iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
- /* Round up to power of two (xen_create_contiguous_region). */
- while (iotlb_nslabs & (iotlb_nslabs-1))
- iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
}
if (*str == ',')
++str;
@@ -123,9 +120,6 @@
if (!iotlb_nslabs) {
iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
- /* Round up to power of two (xen_create_contiguous_region). */
- while (iotlb_nslabs & (iotlb_nslabs-1))
- iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1);
}
bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
@@ -135,10 +129,14 @@
*/
iotlb_virt_start = alloc_bootmem_low_pages(bytes);
if (!iotlb_virt_start)
- panic("Cannot allocate SWIOTLB buffer");
-
- xen_create_contiguous_region(
- (unsigned long)iotlb_virt_start, get_order(bytes));
+ panic("Cannot allocate SWIOTLB buffer!\n"
+ "Use dom0_mem Xen boot parameter to reserve\n"
+ "some DMA memory (e.g., dom0_mem=-128M).\n");
+
+ for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE)
+ xen_create_contiguous_region(
+ (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT),
+ get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT));
iotlb_virt_end = iotlb_virt_start + bytes;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Fri Aug 19 18:19:28 2005
@@ -745,7 +745,7 @@
#endif
/* Dynamically-mapped IRQ. */
-static DEFINE_PER_CPU(int, timer_irq);
+DEFINE_PER_CPU(int, timer_irq);
static struct irqaction irq_timer = {
timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Aug 19 18:19:28 2005
@@ -342,11 +342,15 @@
extern void __init remap_numa_kva(void);
#endif
+pgd_t *swapper_pg_dir;
+
static void __init pagetable_init (void)
{
unsigned long vaddr;
- pgd_t *pgd_base = swapper_pg_dir;
- pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
+ pgd_t *pgd_base = (pgd_t *)xen_start_info.pt_base;
+
+ swapper_pg_dir = pgd_base;
+ init_mm.pgd = pgd_base;
#ifdef CONFIG_X86_PAE
int i;
@@ -366,44 +370,6 @@
__PAGE_KERNEL |= _PAGE_GLOBAL;
__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
}
-
- /*
- * Switch to proper mm_init page directory. Initialise from the current
- * page directory, write-protect the new page directory, then switch to
- * it. We clean up by write-enabling and then freeing the old page dir.
- */
-#ifndef CONFIG_X86_PAE
- memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
- make_page_readonly(pgd_base);
- xen_pgd_pin(__pa(pgd_base));
- load_cr3(pgd_base);
- xen_pgd_unpin(__pa(old_pgd));
- make_page_writable(old_pgd);
- __flush_tlb_all();
- free_bootmem(__pa(old_pgd), PAGE_SIZE);
-#else
- {
- pud_t *old_pud = pud_offset(old_pgd+3, PAGE_OFFSET);
- pmd_t *old_pmd = pmd_offset(old_pud, PAGE_OFFSET);
- pmd_t *new_pmd = alloc_bootmem_low_pages(PAGE_SIZE);
-
- memcpy(new_pmd, old_pmd, PAGE_SIZE);
- memcpy(pgd_base, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
- set_pgd(&pgd_base[3], __pgd(__pa(new_pmd) | _PAGE_PRESENT));
-
- make_page_readonly(new_pmd);
- make_page_readonly(pgd_base);
- xen_pgd_pin(__pa(pgd_base));
- load_cr3(pgd_base);
- xen_pgd_unpin(__pa(old_pgd));
- make_page_writable(old_pgd);
- make_page_writable(old_pmd);
- __flush_tlb_all();
-
- free_bootmem(__pa(old_pgd), PAGE_SIZE);
- free_bootmem(__pa(old_pmd), PAGE_SIZE);
- }
-#endif
init_mm.context.pinned = 1;
kernel_physical_mapping_init(pgd_base);
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Fri Aug 19 18:19:28 2005
@@ -306,7 +306,7 @@
{
mmu_update_t **v = (mmu_update_t **)data;
- (*v)->ptr = ((physaddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+ (*v)->ptr = ((maddr_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
(*v)++;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Fri Aug 19 18:19:28 2005
@@ -170,7 +170,7 @@
__flush_tlb_one(vaddr);
}
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t
flags)
+void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c Fri Aug 19 18:19:28 2005
@@ -144,7 +144,7 @@
vcpu_info_t *vcpu_info = &s->vcpu_data[cpu];
vcpu_info->evtchn_upcall_pending = 0;
-
+
/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
while ( l1 != 0 )
@@ -158,9 +158,9 @@
l2 &= ~(1 << l2i);
port = (l1i << 5) + l2i;
- if ( (irq = evtchn_to_irq[port]) != -1 )
+ if ( (irq = evtchn_to_irq[port]) != -1 ) {
do_IRQ(irq, regs);
- else
+ } else
evtchn_device_upcall(port);
}
}
@@ -243,6 +243,74 @@
}
spin_unlock(&irq_mapping_update_lock);
+}
+
+/* This is only used when a vcpu from an xm save. The ipi is expected
+ to have been bound before we suspended, and so all of the xenolinux
+ state is set up; we only need to restore the Xen side of things.
+ The irq number has to be the same, but the evtchn number can
+ change. */
+void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
+{
+ evtchn_op_t op;
+ int evtchn;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ op.cmd = EVTCHNOP_bind_ipi;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
+ evtchn = op.u.bind_ipi.port;
+
+ printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
+ ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
+ evtchn);
+
+ evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+ irq_to_evtchn[irq] = -1;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
+ evtchn_to_irq[evtchn]);
+ per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
+
+ bind_evtchn_to_cpu(evtchn, vcpu);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
+}
+
+void _bind_virq_to_irq(int virq, int cpu, int irq)
+{
+ evtchn_op_t op;
+ int evtchn;
+
+ spin_lock(&irq_mapping_update_lock);
+
+ op.cmd = EVTCHNOP_bind_virq;
+ op.u.bind_virq.virq = virq;
+ if ( HYPERVISOR_event_channel_op(&op) != 0 )
+ panic("Failed to bind virtual IRQ %d\n", virq);
+ evtchn = op.u.bind_virq.port;
+
+ evtchn_to_irq[irq_to_evtchn[irq]] = -1;
+ irq_to_evtchn[irq] = -1;
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_to_evtchn[irq] = evtchn;
+
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ bind_evtchn_to_cpu(evtchn, cpu);
+
+ spin_unlock(&irq_mapping_update_lock);
+
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
+ clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
}
int bind_ipi_to_irq(int ipi)
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 19 18:19:28 2005
@@ -16,6 +16,8 @@
#include <asm-xen/queues.h>
#include <asm-xen/xenbus.h>
#include <asm-xen/ctrl_if.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
@@ -58,20 +60,74 @@
/* Ignore multiple shutdown requests. */
static int shutting_down = SHUTDOWN_INVALID;
-static void __do_suspend(void)
+#ifndef CONFIG_HOTPLUG_CPU
+#define cpu_down(x) (-EOPNOTSUPP)
+#define cpu_up(x) (-EOPNOTSUPP)
+#endif
+
+static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int r;
+ int gdt_pages;
+ r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
+ if (r != 0)
+ panic("pickling vcpu %d -> %d!\n", vcpu, r);
+
+ /* Translate from machine to physical addresses where necessary,
+ so that they can be translated to our new machine address space
+ after resume. libxc is responsible for doing this to vcpu0,
+ but we do it to the others. */
+ gdt_pages = (ctxt->gdt_ents + 511) / 512;
+ ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
+ for (r = 0; r < gdt_pages; r++)
+ ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
+}
+
+void _restore_vcpu(int cpu);
+
+atomic_t vcpus_rebooting;
+
+static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
+{
+ int r;
+ int gdt_pages = (ctxt->gdt_ents + 511) / 512;
+
+ /* This is kind of a hack, and implicitly relies on the fact that
+ the vcpu stops in a place where all of the call clobbered
+ registers are already dead. */
+ ctxt->user_regs.esp -= 4;
+ ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
+ ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
+
+ /* De-canonicalise. libxc handles this for vcpu 0, but we need
+ to do it for the other vcpus. */
+ ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
+ for (r = 0; r < gdt_pages; r++)
+ ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
+
+ atomic_set(&vcpus_rebooting, 1);
+ r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
+ if (r != 0) {
+ printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
+ return -1;
+ }
+
+ /* Make sure we wait for the new vcpu to come up before trying to do
+ anything with it or starting the next one. */
+ while (atomic_read(&vcpus_rebooting))
+ barrier();
+
+ return 0;
+}
+
+static int __do_suspend(void *ignore)
{
int i, j;
suspend_record_t *suspend_record;
+ static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
/* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
/* XXX SMH: yes it would :-( */
-#ifdef CONFIG_XEN_BLKDEV_FRONTEND
- extern void blkdev_suspend(void);
- extern void blkdev_resume(void);
-#else
-#define blkdev_suspend() do{}while(0)
-#define blkdev_resume() do{}while(0)
-#endif
#ifdef CONFIG_XEN_NETDEV_FRONTEND
extern void netif_suspend(void);
@@ -104,13 +160,63 @@
extern unsigned long max_pfn;
extern unsigned int *pfn_to_mfn_frame_list;
+ cpumask_t prev_online_cpus, prev_present_cpus;
+ int err = 0;
+
+ BUG_ON(smp_processor_id() != 0);
+ BUG_ON(in_interrupt());
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
+ if (num_online_cpus() > 1) {
+ printk(KERN_WARNING "Can't suspend SMP guests without
CONFIG_HOTPLUG_CPU\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
if ( suspend_record == NULL )
goto out;
+ /* Take all of the other cpus offline. We need to be careful not
+ to get preempted between the final test for num_online_cpus()
+ == 1 and disabling interrupts, since otherwise userspace could
+ bring another cpu online, and then we'd be stuffed. At the
+ same time, cpu_down can reschedule, so we need to enable
+ preemption while doing that. This kind of sucks, but should be
+ correct. */
+ /* (We don't need to worry about other cpus bringing stuff up,
+ since by the time num_online_cpus() == 1, there aren't any
+ other cpus) */
+ cpus_clear(prev_online_cpus);
+ preempt_disable();
+ while (num_online_cpus() > 1) {
+ preempt_enable();
+ for_each_online_cpu(i) {
+ if (i == 0)
+ continue;
+ err = cpu_down(i);
+ if (err != 0) {
+ printk(KERN_CRIT "Failed to take all CPUs down: %d.\n", err);
+ goto out_reenable_cpus;
+ }
+ cpu_set(i, prev_online_cpus);
+ }
+ preempt_disable();
+ }
+
suspend_record->nr_pfns = max_pfn; /* final number of pfns */
__cli();
+
+ preempt_enable();
+
+ cpus_clear(prev_present_cpus);
+ for_each_present_cpu(i) {
+ if (i == 0)
+ continue;
+ save_vcpu_context(i, &suspended_cpu_records[i]);
+ cpu_set(i, prev_present_cpus);
+ }
#ifdef __i386__
mm_pin_all();
@@ -119,8 +225,6 @@
netif_suspend();
- blkdev_suspend();
-
time_suspend();
#ifdef CONFIG_SMP
@@ -141,7 +245,9 @@
memcpy(&suspend_record->resume_info, &xen_start_info,
sizeof(xen_start_info));
- HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
+ /* We'll stop somewhere inside this hypercall. When it returns,
+ we'll start resuming after the restore. */
+ HYPERVISOR_suspend(virt_to_mfn(suspend_record));
shutting_down = SHUTDOWN_INVALID;
@@ -157,10 +263,10 @@
for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
{
pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT;
+ virt_to_mfn(&phys_to_machine_mapping[i]);
}
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
- virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
+ virt_to_mfn(pfn_to_mfn_frame_list);
gnttab_resume();
@@ -176,17 +282,30 @@
time_resume();
- blkdev_resume();
-
netif_resume();
usbif_resume();
+ for_each_cpu_mask(i, prev_present_cpus) {
+ restore_vcpu_context(i, &suspended_cpu_records[i]);
+ }
+
__sti();
+
+ out_reenable_cpus:
+ for_each_cpu_mask(i, prev_online_cpus) {
+ j = cpu_up(i);
+ if (j != 0) {
+ printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
+ i, j);
+ err = j;
+ }
+ }
out:
if ( suspend_record != NULL )
free_page((unsigned long)suspend_record);
+ return err;
}
static int shutdown_process(void *__unused)
@@ -233,6 +352,18 @@
return 0;
}
+static struct task_struct *kthread_create_on_cpu(int (*f)(void *arg),
+ void *arg,
+ const char *name,
+ int cpu)
+{
+ struct task_struct *p;
+ p = kthread_create(f, arg, name);
+ kthread_bind(p, cpu);
+ wake_up_process(p);
+ return p;
+}
+
static void __shutdown_handler(void *unused)
{
int err;
@@ -245,7 +376,7 @@
}
else
{
- __do_suspend();
+ kthread_create_on_cpu(__do_suspend, NULL, "suspender", 0);
}
}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c Fri Aug 19
18:19:28 2005
@@ -453,8 +453,8 @@
#define C(i) do { \
if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \
mcl->op = __HYPERVISOR_update_descriptor; \
- mcl->args[0] = virt_to_machine(&get_cpu_gdt_table(cpu) \
- [GDT_ENTRY_TLS_MIN + i]); \
+ mcl->args[0] = virt_to_machine( \
+ &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
mcl->args[1] = next->tls_array[i]; \
mcl++; \
} \
@@ -474,7 +474,7 @@
iobmp_op.cmd =
PHYSDEVOP_SET_IOBITMAP;
iobmp_op.u.set_iobitmap.bitmap =
- (unsigned long)next->io_bitmap_ptr;
+ (char *)next->io_bitmap_ptr;
iobmp_op.u.set_iobitmap.nr_ports =
next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
mcl->op = __HYPERVISOR_physdev_op;
@@ -743,3 +743,9 @@
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
+
+#ifndef CONFIG_SMP
+void _restore_vcpu(void)
+{
+}
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Fri Aug 19
18:19:28 2005
@@ -795,7 +795,7 @@
for ( i=0, j=0; i < end_pfn; i+=(PAGE_SIZE/sizeof(unsigned
long)), j++ )
{
pfn_to_mfn_frame_list[j] =
- virt_to_machine(&phys_to_machine_mapping[i]) >>
PAGE_SHIFT;
+ virt_to_mfn(&phys_to_machine_mapping[i]);
}
}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Fri Aug 19
18:19:28 2005
@@ -139,7 +139,7 @@
for (va = gdt_descr->address, f = 0;
va < gdt_descr->address + gdt_descr->size;
va += PAGE_SIZE, f++) {
- frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
@@ -280,19 +280,16 @@
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
}
-#endif
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
- cpu_gdt_init(&cpu_gdt_descr[cpu]);
-
-#ifndef CONFIG_XEN
memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES
* 8);
-
#else
memcpy(me->thread.tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
GDT_ENTRY_TLS_ENTRIES * 8);
+
+ cpu_gdt_init(&cpu_gdt_descr[cpu]);
#endif
/*
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Fri Aug 19
18:19:28 2005
@@ -780,7 +780,7 @@
for (va = cpu_gdt_descr[cpu].address, f = 0;
va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
va += PAGE_SIZE, f++) {
- ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ ctxt.gdt_frames[f] = virt_to_mfn(va);
make_page_readonly((void *)va);
}
ctxt.gdt_ents = GDT_ENTRIES;
@@ -795,7 +795,7 @@
ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
ctxt.syscall_callback_eip = (unsigned long)system_call;
- ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+ ctxt.ctrlreg[3] = virt_to_mfn(init_level4_pgt) << PAGE_SHIFT;
boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
@@ -1286,4 +1286,10 @@
smp_intr_init();
local_setup_timer_irq();
}
-#endif
+
+void _restore_vcpu(void)
+{
+ /* XXX need to write this */
+}
+
+#endif
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Aug 19 18:19:28 2005
@@ -742,7 +742,7 @@
set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
else
__set_fixmap(FIX_ISAMAP_BEGIN - i,
- virt_to_machine(empty_zero_page),
+ virt_to_mfn(empty_zero_page) <<
PAGE_SHIFT,
PAGE_KERNEL_RO);
}
#endif
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkback/Makefile
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/Makefile Fri Aug 19 18:19:28 2005
@@ -1,2 +1,2 @@
-obj-y := blkback.o control.o interface.o vbd.o
+obj-y := blkback.o xenbus.o interface.o vbd.o
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Aug 19
18:19:28 2005
@@ -104,7 +104,6 @@
#endif
static int do_block_io_op(blkif_t *blkif, int max_to_do);
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
static void make_response(blkif_t *blkif, unsigned long id,
unsigned short op, int st);
@@ -349,10 +348,6 @@
dispatch_rw_block_io(blkif, req);
break;
- case BLKIF_OP_PROBE:
- dispatch_probe(blkif, req);
- break;
-
default:
DPRINTK("error: unknown block io operation [%d]\n",
req->operation);
@@ -363,66 +358,6 @@
blk_ring->req_cons = i;
return more_to_do;
-}
-
-static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
-{
- int rsp = BLKIF_RSP_ERROR;
- int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
-
- /* We expect one buffer only. */
- if ( unlikely(req->nr_segments != 1) )
- goto out;
-
- /* Make sure the buffer is page-sized. */
- if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
- (blkif_last_sect(req->frame_and_sects[0]) != ((PAGE_SIZE/512)-1)) )
- goto out;
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- {
- struct gnttab_map_grant_ref map;
-
- map.host_addr = MMAP_VADDR(pending_idx, 0);
- map.flags = GNTMAP_host_map;
- map.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
- map.dom = blkif->domid;
-
- if ( unlikely(HYPERVISOR_grant_table_op(
- GNTTABOP_map_grant_ref, &map, 1)))
- BUG();
-
- if ( map.handle < 0 )
- goto out;
-
- pending_handle(pending_idx, 0) = map.handle;
- }
-#else /* else CONFIG_XEN_BLKDEV_GRANT */
-
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
- /* Grab the real frontend out of the probe message. */
- if (req->frame_and_sects[1] == BLKTAP_COOKIE)
- blkif->is_blktap = 1;
-#endif
-
-
- if ( HYPERVISOR_update_va_mapping_otherdomain(
- MMAP_VADDR(pending_idx, 0),
- pfn_pte_ma(req->frame_and_sects[0] >> PAGE_SHIFT, PAGE_KERNEL),
-#ifdef CONFIG_XEN_BLKDEV_TAP_BE
- 0, (blkif->is_blktap ? ID_TO_DOM(req->id) : blkif->domid) ) )
-#else
- 0, blkif->domid) )
-#endif
- goto out;
-#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
-
- rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
- PAGE_SIZE / sizeof(vdisk_t));
-
- out:
- fast_flush_area(pending_idx, 1);
- make_response(blkif, req->id, req->operation, rsp);
}
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
@@ -460,7 +395,7 @@
goto bad_descriptor;
}
- preq.dev = req->device;
+ preq.dev = req->handle;
preq.sector_number = req->sector_number;
preq.nr_sects = 0;
@@ -730,8 +665,8 @@
0, SLAB_HWCACHE_ALIGN, NULL, NULL);
#endif
- blkif_ctrlif_init();
-
+ blkif_xenbus_init();
+
#ifdef CONFIG_XEN_BLKDEV_GRANT
memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
printk(KERN_ALERT "Blkif backend is using grant tables.\n");
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Fri Aug 19 18:19:28 2005
@@ -13,7 +13,6 @@
#include <asm/io.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
-#include <asm-xen/ctrl_if.h>
#include <asm-xen/evtchn.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/io/blkif.h>
@@ -47,6 +46,7 @@
/* Physical parameters of the comms window. */
unsigned long shmem_frame;
unsigned int evtchn;
+ unsigned int remote_evtchn;
/* Comms information. */
blkif_back_ring_t blk_ring;
/* VBDs attached to this interface. */
@@ -71,7 +71,7 @@
struct work_struct work;
#ifdef CONFIG_XEN_BLKDEV_GRANT
u16 shmem_handle;
- memory_t shmem_vaddr;
+ unsigned long shmem_vaddr;
grant_ref_t shmem_ref;
#endif
} blkif_t;
@@ -81,17 +81,29 @@
void blkif_connect(blkif_be_connect_t *connect);
int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
void blkif_disconnect_complete(blkif_t *blkif);
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
+blkif_t *blkif_find(domid_t domid);
+void free_blkif(blkif_t *blkif);
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
+
#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
#define blkif_put(_b) \
do { \
if ( atomic_dec_and_test(&(_b)->refcnt) ) \
- blkif_disconnect_complete(_b); \
+ free_blkif(_b); \
} while (0)
-void vbd_create(blkif_be_vbd_create_t *create);
+struct vbd;
+void vbd_free(blkif_t *blkif, struct vbd *vbd);
+
+/* Creates inactive vbd. */
+struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, blkif_pdev_t
pdevice, int readonly);
+int vbd_is_active(struct vbd *vbd);
+void vbd_activate(blkif_t *blkif, struct vbd *vbd);
+
+unsigned long vbd_size(struct vbd *vbd);
+unsigned int vbd_info(struct vbd *vbd);
+unsigned long vbd_secsize(struct vbd *vbd);
void vbd_destroy(blkif_be_vbd_destroy_t *delete);
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
void destroy_all_vbds(blkif_t *blkif);
struct phys_req {
@@ -104,9 +116,10 @@
int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
void blkif_interface_init(void);
-void blkif_ctrlif_init(void);
void blkif_deschedule(blkif_t *blkif);
+
+void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkback/interface.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 19
18:19:28 2005
@@ -7,24 +7,135 @@
*/
#include "common.h"
+#include <asm-xen/ctrl_if.h>
+#include <asm-xen/evtchn.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#define VMALLOC_VMADDR(x) ((unsigned long)(x))
#endif
#define BLKIF_HASHSZ 1024
-#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1))
+#define BLKIF_HASH(_d) (((int)(_d))&(BLKIF_HASHSZ-1))
static kmem_cache_t *blkif_cachep;
static blkif_t *blkif_hash[BLKIF_HASHSZ];
-blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
-{
- blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif != NULL) &&
- ((blkif->domid != domid) || (blkif->handle != handle)) )
+blkif_t *blkif_find(domid_t domid)
+{
+ blkif_t *blkif = blkif_hash[BLKIF_HASH(domid)];
+
+ while (blkif) {
+ if (blkif->domid == domid) {
+ blkif_get(blkif);
+ return blkif;
+ }
blkif = blkif->hash_next;
+ }
+
+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+ if (!blkif)
+ return ERR_PTR(-ENOMEM);
+
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->vbd_lock);
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 1);
+
+ blkif->hash_next = blkif_hash[BLKIF_HASH(domid)];
+ blkif_hash[BLKIF_HASH(domid)] = blkif;
return blkif;
+}
+
+#ifndef CONFIG_XEN_BLKDEV_GRANT
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+ unsigned long shared_page)
+{
+ return direct_remap_area_pages(&init_mm, localaddr,
+ shared_page<<PAGE_SHIFT, PAGE_SIZE,
+ __pgprot(_KERNPG_TABLE), blkif->domid);
+}
+
+static void unmap_frontend_page(blkif_t *blkif)
+{
+}
+#else
+static int map_frontend_page(blkif_t *blkif, unsigned long localaddr,
+ unsigned long shared_page)
+{
+ struct gnttab_map_grant_ref op;
+ op.host_addr = localaddr;
+ op.flags = GNTMAP_host_map;
+ op.ref = shared_page;
+ op.dom = blkif->domid;
+
+ BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
+
+ if (op.handle < 0) {
+ DPRINTK(" Grant table operation failure !\n");
+ return op.handle;
+ }
+
+ blkif->shmem_ref = shared_page;
+ blkif->shmem_handle = op.handle;
+ blkif->shmem_vaddr = localaddr;
+ return 0;
+}
+
+static void unmap_frontend_page(blkif_t *blkif)
+{
+ struct gnttab_unmap_grant_ref op;
+
+ op.host_addr = blkif->shmem_vaddr;
+ op.handle = blkif->shmem_handle;
+ op.dev_bus_addr = 0;
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
+}
+#endif /* CONFIG_XEN_BLKDEV_GRANT */
+
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
+{
+ struct vm_struct *vma;
+ blkif_sring_t *sring;
+ evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+ int err;
+
+ BUG_ON(blkif->remote_evtchn);
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ return -ENOMEM;
+
+ err = map_frontend_page(blkif, VMALLOC_VMADDR(vma->addr), shared_page);
+ if (err) {
+ vfree(vma->addr);
+ return err;
+ }
+
+ op.u.bind_interdomain.dom1 = DOMID_SELF;
+ op.u.bind_interdomain.dom2 = blkif->domid;
+ op.u.bind_interdomain.port1 = 0;
+ op.u.bind_interdomain.port2 = evtchn;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ unmap_frontend_page(blkif);
+ vfree(vma->addr);
+ return err;
+ }
+
+ blkif->evtchn = op.u.bind_interdomain.port1;
+ blkif->remote_evtchn = evtchn;
+
+ sring = (blkif_sring_t *)vma->addr;
+ SHARED_RING_INIT(sring);
+ BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+
+ bind_evtchn_to_irqhandler(blkif->evtchn, blkif_be_int, 0, "blkif-backend",
+ blkif);
+ blkif->status = CONNECTED;
+ blkif->shmem_frame = shared_page;
+
+ return 0;
}
static void __blkif_disconnect_complete(void *arg)
@@ -32,21 +143,13 @@
blkif_t *blkif = (blkif_t *)arg;
ctrl_msg_t cmsg;
blkif_be_disconnect_t disc;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- struct gnttab_unmap_grant_ref op;
-#endif
/*
* These can't be done in blkif_disconnect() because at that point there
* may be outstanding requests at the disc whose asynchronous responses
* must still be notified to the remote driver.
*/
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- op.host_addr = blkif->shmem_vaddr;
- op.handle = blkif->shmem_handle;
- op.dev_bus_addr = 0;
- BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
-#endif
+ unmap_frontend_page(blkif);
vfree(blkif->blk_ring.sring);
/* Construct the deferred response message. */
@@ -81,200 +184,35 @@
schedule_work(&blkif->work);
}
-void blkif_create(blkif_be_create_t *create)
-{
- domid_t domid = create->domid;
- unsigned int handle = create->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL )
+void free_blkif(blkif_t *blkif)
+{
+ blkif_t **pblkif;
+ evtchn_op_t op = { .cmd = EVTCHNOP_close };
+
+ op.u.close.port = blkif->evtchn;
+ op.u.close.dom = DOMID_SELF;
+ HYPERVISOR_event_channel_op(&op);
+ op.u.close.port = blkif->remote_evtchn;
+ op.u.close.dom = blkif->domid;
+ HYPERVISOR_event_channel_op(&op);
+
+ if (blkif->evtchn)
+ unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
+
+ if (blkif->blk_ring.sring) {
+ unmap_frontend_page(blkif);
+ vfree(blkif->blk_ring.sring);
+ }
+
+ pblkif = &blkif_hash[BLKIF_HASH(blkif->domid)];
+ while ( *pblkif != blkif )
{
- DPRINTK("Could not create blkif: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- memset(blkif, 0, sizeof(*blkif));
- blkif->domid = domid;
- blkif->handle = handle;
- blkif->status = DISCONNECTED;
- spin_lock_init(&blkif->vbd_lock);
- spin_lock_init(&blkif->blk_ring_lock);
- atomic_set(&blkif->refcnt, 0);
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( *pblkif != NULL )
- {
- if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
- {
- DPRINTK("Could not create blkif: already exists\n");
- create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
- kmem_cache_free(blkif_cachep, blkif);
- return;
- }
+ BUG_ON(!*pblkif);
pblkif = &(*pblkif)->hash_next;
}
-
- blkif->hash_next = *pblkif;
- *pblkif = blkif;
-
- DPRINTK("Successfully created blkif\n");
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_destroy(blkif_be_destroy_t *destroy)
-{
- domid_t domid = destroy->domid;
- unsigned int handle = destroy->blkif_handle;
- blkif_t **pblkif, *blkif;
-
- pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
- while ( (blkif = *pblkif) != NULL )
- {
- if ( (blkif->domid == domid) && (blkif->handle == handle) )
- {
- if ( blkif->status != DISCONNECTED )
- goto still_connected;
- goto destroy;
- }
- pblkif = &blkif->hash_next;
- }
-
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
-
- still_connected:
- destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- return;
-
- destroy:
*pblkif = blkif->hash_next;
destroy_all_vbds(blkif);
kmem_cache_free(blkif_cachep, blkif);
- destroy->status = BLKIF_BE_STATUS_OKAY;
-}
-
-void blkif_connect(blkif_be_connect_t *connect)
-{
- domid_t domid = connect->domid;
- unsigned int handle = connect->blkif_handle;
- unsigned int evtchn = connect->evtchn;
- unsigned long shmem_frame = connect->shmem_frame;
- struct vm_struct *vma;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int ref = connect->shmem_ref;
-#else
- pgprot_t prot;
- int error;
-#endif
- blkif_t *blkif;
- blkif_sring_t *sring;
-
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n",
- connect->domid, connect->blkif_handle);
- connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
- {
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
-#ifndef CONFIG_XEN_BLKDEV_GRANT
- prot = __pgprot(_KERNPG_TABLE);
- error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
- shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
- prot, domid);
- if ( error != 0 )
- {
- if ( error == -ENOMEM )
- connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- else if ( error == -EFAULT )
- connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- else
- connect->status = BLKIF_BE_STATUS_ERROR;
- vfree(vma->addr);
- return;
- }
-#else
- { /* Map: Use the Grant table reference */
- struct gnttab_map_grant_ref op;
- op.host_addr = VMALLOC_VMADDR(vma->addr);
- op.flags = GNTMAP_host_map;
- op.ref = ref;
- op.dom = domid;
-
- BUG_ON( HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1) );
-
- handle = op.handle;
-
- if (op.handle < 0) {
- DPRINTK(" Grant table operation failure !\n");
- connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
- vfree(vma->addr);
- return;
- }
-
- blkif->shmem_ref = ref;
- blkif->shmem_handle = handle;
- blkif->shmem_vaddr = VMALLOC_VMADDR(vma->addr);
- }
-#endif
-
- if ( blkif->status != DISCONNECTED )
- {
- connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
- vfree(vma->addr);
- return;
- }
- sring = (blkif_sring_t *)vma->addr;
- SHARED_RING_INIT(sring);
- BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
-
- blkif->evtchn = evtchn;
- blkif->shmem_frame = shmem_frame;
- blkif->status = CONNECTED;
- blkif_get(blkif);
-
- bind_evtchn_to_irqhandler(
- blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
-
- connect->status = BLKIF_BE_STATUS_OKAY;
-}
-
-int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
-{
- domid_t domid = disconnect->domid;
- unsigned int handle = disconnect->blkif_handle;
- blkif_t *blkif;
-
- blkif = blkif_find_by_handle(domid, handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("blkif_disconnect attempted for non-existent blkif"
- " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle);
- disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return 1; /* Caller will send response error message. */
- }
-
- if ( blkif->status == CONNECTED )
- {
- blkif->status = DISCONNECTING;
- blkif->disconnect_rspid = rsp_id;
- wmb(); /* Let other CPUs see the status change. */
- unbind_evtchn_from_irqhandler(blkif->evtchn, blkif);
- blkif_deschedule(blkif);
- blkif_put(blkif);
- return 0; /* Caller should not send response message. */
- }
-
- disconnect->status = BLKIF_BE_STATUS_OKAY;
- return 1;
}
void __init blkif_interface_init(void)
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c Fri Aug 19 18:19:28 2005
@@ -11,13 +11,16 @@
*/
#include "common.h"
+#include <asm-xen/xenbus.h>
struct vbd {
- blkif_vdev_t vdevice; /* what the domain refers to this vbd as */
+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
unsigned char readonly; /* Non-zero -> read-only */
unsigned char type; /* VDISK_xxx */
blkif_pdev_t pdevice; /* phys device that this vbd maps to */
struct block_device *bdev;
+
+ int active;
rb_node_t rb; /* for linking into R-B tree lookup struct */
};
@@ -33,140 +36,128 @@
#define bdev_hardsect_size(_b) 512
#endif
-void vbd_create(blkif_be_vbd_create_t *create)
+unsigned long vbd_size(struct vbd *vbd)
+{
+ return vbd_sz(vbd);
+}
+
+unsigned int vbd_info(struct vbd *vbd)
+{
+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
+}
+
+unsigned long vbd_secsize(struct vbd *vbd)
+{
+ return bdev_hardsect_size(vbd->bdev);
+}
+
+int vbd_is_active(struct vbd *vbd)
+{
+ return vbd->active;
+}
+
+struct vbd *vbd_create(blkif_t *blkif, blkif_vdev_t handle,
+ blkif_pdev_t pdevice, int readonly)
{
struct vbd *vbd;
+
+ if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
+ {
+ DPRINTK("vbd_create: out of memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ vbd->handle = handle;
+ vbd->readonly = readonly;
+ vbd->type = 0;
+ vbd->active = 0;
+
+ vbd->pdevice = pdevice;
+
+ /* FIXME: Who frees vbd on failure? --RR */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+ vbd->bdev = open_by_devnum(
+ vbd_map_devnum(vbd->pdevice),
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
+ if ( IS_ERR(vbd->bdev) )
+ {
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ return ERR_PTR(-ENOENT);
+ }
+
+ if ( (vbd->bdev->bd_disk == NULL) )
+ {
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ bdev_put(vbd->bdev);
+ return ERR_PTR(-ENOENT);
+ }
+
+ if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
+ vbd->type |= VDISK_CDROM;
+ if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
+ vbd->type |= VDISK_REMOVABLE;
+
+#else
+ if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
+ {
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
+ return ERR_PTR(-ENOENT);
+ }
+#endif
+
+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+ handle, blkif->domid);
+ return vbd;
+}
+
+void vbd_activate(blkif_t *blkif, struct vbd *vbd)
+{
rb_node_t **rb_p, *rb_parent = NULL;
- blkif_t *blkif;
- blkif_vdev_t vdevice = create->vdevice;
-
- blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_create attempted for non-existent blkif (%u,%u)\n",
- create->domid, create->blkif_handle);
- create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
+ struct vbd *i;
+ BUG_ON(vbd_is_active(vbd));
+
+ /* Find where to put it. */
rb_p = &blkif->vbd_rb.rb_node;
while ( *rb_p != NULL )
{
rb_parent = *rb_p;
- vbd = rb_entry(rb_parent, struct vbd, rb);
- if ( vdevice < vbd->vdevice )
+ i = rb_entry(rb_parent, struct vbd, rb);
+ if ( vbd->handle < i->handle )
{
rb_p = &rb_parent->rb_left;
}
- else if ( vdevice > vbd->vdevice )
+ else if ( vbd->handle > i->handle )
{
rb_p = &rb_parent->rb_right;
}
else
{
- DPRINTK("vbd_create attempted for already existing vbd\n");
- create->status = BLKIF_BE_STATUS_VBD_EXISTS;
- return;
+ /* We never create two of same vbd, so not possible. */
+ BUG();
}
}
- if ( unlikely((vbd = kmalloc(sizeof(struct vbd), GFP_KERNEL)) == NULL) )
- {
- DPRINTK("vbd_create: out of memory\n");
- create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
- return;
- }
-
- vbd->vdevice = vdevice;
- vbd->readonly = create->readonly;
- vbd->type = 0;
-
- /* Mask to 16-bit for compatibility with old tools */
- vbd->pdevice = create->pdevice & 0xffff;
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
- vbd->bdev = open_by_devnum(
- vbd_map_devnum(vbd->pdevice),
- vbd->readonly ? FMODE_READ : FMODE_WRITE);
- if ( IS_ERR(vbd->bdev) )
- {
- DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- return;
- }
-
- if ( (vbd->bdev->bd_disk == NULL) )
- {
- DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- bdev_put(vbd->bdev);
- return;
- }
-
- if ( vbd->bdev->bd_disk->flags & GENHD_FL_CD )
- vbd->type |= VDISK_CDROM;
- if ( vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE )
- vbd->type |= VDISK_REMOVABLE;
-
-#else
- if ( (blk_size[MAJOR(vbd->pdevice)] == NULL) || (vbd_sz(vbd) == 0) )
- {
- DPRINTK("vbd_creat: device %08x doesn't exist.\n", vbd->pdevice);
- create->status = BLKIF_BE_STATUS_PHYSDEV_NOT_FOUND;
- return;
- }
-#endif
+ /* Now we're active. */
+ vbd->active = 1;
+ blkif_get(blkif);
spin_lock(&blkif->vbd_lock);
rb_link_node(&vbd->rb, rb_parent, rb_p);
rb_insert_color(&vbd->rb, &blkif->vbd_rb);
spin_unlock(&blkif->vbd_lock);
-
- DPRINTK("Successful creation of vdev=%04x (dom=%u)\n",
- vdevice, create->domid);
- create->status = BLKIF_BE_STATUS_OKAY;
-}
-
-
-void vbd_destroy(blkif_be_vbd_destroy_t *destroy)
-{
- blkif_t *blkif;
- struct vbd *vbd;
- rb_node_t *rb;
- blkif_vdev_t vdevice = destroy->vdevice;
-
- blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
- if ( unlikely(blkif == NULL) )
- {
- DPRINTK("vbd_destroy attempted for non-existent blkif (%u,%u)\n",
- destroy->domid, destroy->blkif_handle);
- destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
- return;
- }
-
- rb = blkif->vbd_rb.rb_node;
- while ( rb != NULL )
- {
- vbd = rb_entry(rb, struct vbd, rb);
- if ( vdevice < vbd->vdevice )
- rb = rb->rb_left;
- else if ( vdevice > vbd->vdevice )
- rb = rb->rb_right;
- else
- goto found;
- }
-
- destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
- return;
-
- found:
- spin_lock(&blkif->vbd_lock);
- rb_erase(rb, &blkif->vbd_rb);
- spin_unlock(&blkif->vbd_lock);
+}
+
+void vbd_free(blkif_t *blkif, struct vbd *vbd)
+{
+ if (vbd_is_active(vbd)) {
+ spin_lock(&blkif->vbd_lock);
+ rb_erase(&vbd->rb, &blkif->vbd_rb);
+ spin_unlock(&blkif->vbd_lock);
+ blkif_put(blkif);
+ }
bdev_put(vbd->bdev);
kfree(vbd);
}
-
void destroy_all_vbds(blkif_t *blkif)
{
@@ -183,73 +174,11 @@
bdev_put(vbd->bdev);
kfree(vbd);
spin_lock(&blkif->vbd_lock);
+ blkif_put(blkif);
}
spin_unlock(&blkif->vbd_lock);
}
-
-
-static void vbd_probe_single(
- blkif_t *blkif, vdisk_t *vbd_info, struct vbd *vbd)
-{
- vbd_info->device = vbd->vdevice;
- vbd_info->info = vbd->type | (vbd->readonly ? VDISK_READONLY : 0);
- vbd_info->capacity = vbd_sz(vbd);
- vbd_info->sector_size = bdev_hardsect_size(vbd->bdev);
-}
-
-
-int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
-{
- int rc = 0, nr_vbds = 0;
- rb_node_t *rb;
-
- spin_lock(&blkif->vbd_lock);
-
- if ( (rb = blkif->vbd_rb.rb_node) == NULL )
- goto out;
-
- new_subtree:
- /* STEP 1. Find least node (it'll be left-most). */
- while ( rb->rb_left != NULL )
- rb = rb->rb_left;
-
- for ( ; ; )
- {
- /* STEP 2. Dealt with left subtree. Now process current node. */
- vbd_probe_single(blkif, &vbd_info[nr_vbds],
- rb_entry(rb, struct vbd, rb));
- if ( ++nr_vbds == max_vbds )
- goto out;
-
- /* STEP 3. Process right subtree, if any. */
- if ( rb->rb_right != NULL )
- {
- rb = rb->rb_right;
- goto new_subtree;
- }
-
- /* STEP 4. Done both subtrees. Head back through ancesstors. */
- for ( ; ; )
- {
- /* We're done when we get back to the root node. */
- if ( rb->rb_parent == NULL )
- goto out;
- /* If we are left of parent, then parent is next to process. */
- if ( rb->rb_parent->rb_left == rb )
- break;
- /* If we are right of parent, then we climb to grandparent. */
- rb = rb->rb_parent;
- }
-
- rb = rb->rb_parent;
- }
-
- out:
- spin_unlock(&blkif->vbd_lock);
- return (rc == 0) ? nr_vbds : rc;
-}
-
int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
{
@@ -264,9 +193,9 @@
while ( rb != NULL )
{
vbd = rb_entry(rb, struct vbd, rb);
- if ( req->dev < vbd->vdevice )
+ if ( req->dev < vbd->handle )
rb = rb->rb_left;
- else if ( req->dev > vbd->vdevice )
+ else if ( req->dev > vbd->handle )
rb = rb->rb_right;
else
goto found;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 19
18:19:28 2005
@@ -53,8 +53,8 @@
#include <linux/sched.h>
#include <linux/interrupt.h>
#include <scsi/scsi.h>
-#include <asm-xen/ctrl_if.h>
#include <asm-xen/evtchn.h>
+#include <asm-xen/xenbus.h>
#ifdef CONFIG_XEN_BLKDEV_GRANT
#include <asm-xen/xen-public/grant_table.h>
#include <asm-xen/gnttab.h>
@@ -65,22 +65,14 @@
/* Control whether runtime update of vbds is enabled. */
#define ENABLE_VBD_UPDATE 1
-#if ENABLE_VBD_UPDATE
-static void vbd_update(void);
-#else
-static void vbd_update(void){};
-#endif
-
#define BLKIF_STATE_CLOSED 0
#define BLKIF_STATE_DISCONNECTED 1
#define BLKIF_STATE_CONNECTED 2
-static int blkif_handle = 0;
static unsigned int blkif_state = BLKIF_STATE_CLOSED;
static unsigned int blkif_evtchn = 0;
-
-static int blkif_control_rsp_valid;
-static blkif_response_t blkif_control_rsp;
+static unsigned int blkif_vbds = 0;
+static unsigned int blkif_vbds_connected = 0;
static blkif_front_ring_t blk_ring;
@@ -92,6 +84,7 @@
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
#define GRANTREF_INVALID (1<<15)
+static int shmem_ref;
#endif
static struct blk_shadow {
@@ -105,7 +98,7 @@
static void kick_pending_request_queues(void);
-int __init xlblk_init(void);
+static int __init xlblk_init(void);
static void blkif_completion(struct blk_shadow *s);
@@ -179,19 +172,6 @@
module_init(xlblk_init);
-#if ENABLE_VBD_UPDATE
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
- static DECLARE_WORK(update_tq, update_vbds_task, NULL);
- schedule_work(&update_tq);
-}
-#endif /* ENABLE_VBD_UPDATE */
-
static struct xlbd_disk_info *head_waiting = NULL;
static void kick_pending_request_queues(void)
{
@@ -221,16 +201,7 @@
int blkif_release(struct inode *inode, struct file *filep)
{
- struct gendisk *gd = inode->i_bdev->bd_disk;
- struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
-
- /*
- * When usage drops to zero it may allow more VBD updates to occur.
- * Update of usage count is protected by a per-device semaphore.
- */
- if ( --di->mi->usage == 0 )
- vbd_update();
-
+ /* FIXME: This is where we can actually free up majors, etc. --RR */
return 0;
}
@@ -301,7 +272,7 @@
ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
- ring_req->device = di->xd_device;
+ ring_req->handle = di->handle;
ring_req->nr_segments = 0;
rq_for_each_bio(bio, req)
@@ -446,10 +417,6 @@
end_that_request_last(req);
break;
- case BLKIF_OP_PROBE:
- memcpy(&blkif_control_rsp, bret, sizeof(*bret));
- blkif_control_rsp_valid = 1;
- break;
default:
BUG();
}
@@ -483,28 +450,6 @@
#define blkif_io_lock io_request_lock
/*============================================================================*/
-#if ENABLE_VBD_UPDATE
-
-/*
- * blkif_update_int/update-vbds_task - handle VBD update events.
- * Schedule a task for keventd to run, which will update the VBDs and perform
- * the corresponding updates to our view of VBD state.
- */
-static void update_vbds_task(void *unused)
-{
- xlvbd_update_vbds();
-}
-
-static void vbd_update(void)
-{
- static struct tq_struct update_tq;
- update_tq.routine = update_vbds_task;
- schedule_task(&update_tq);
-}
-
-#endif /* ENABLE_VBD_UPDATE */
-/*============================================================================*/
-
static void kick_pending_request_queues(void)
{
/* We kick pending request queues if the ring is reasonably empty. */
@@ -757,7 +702,8 @@
char * buffer,
unsigned long sector_number,
unsigned short nr_sectors,
- kdev_t device)
+ kdev_t device,
+ blkif_vdev_t handle)
{
unsigned long buffer_ma = virt_to_bus(buffer);
unsigned long xid;
@@ -871,7 +817,7 @@
req->id = xid;
req->operation = operation;
req->sector_number = (blkif_sector_t)sector_number;
- req->device = device;
+ req->handle = handle;
req->nr_segments = 1;
#ifdef CONFIG_XEN_BLKDEV_GRANT
/* install a grant reference. */
@@ -1047,108 +993,10 @@
/***************************** COMMON CODE *******************************/
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
- unsigned long address)
-{
- int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
- ASSERT( ref != -ENOSPC );
-
- gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
-
- req->frame_and_sects[0] = blkif_fas_from_gref(ref, 0, (PAGE_SIZE/512)-1);
-
- blkif_control_send(req, rsp);
-}
-#endif
-
-void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
-{
- unsigned long flags, id;
- blkif_request_t *req_d;
-
- retry:
- while ( RING_FULL(&blk_ring) )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- spin_lock_irqsave(&blkif_io_lock, flags);
- if ( RING_FULL(&blk_ring) )
- {
- spin_unlock_irqrestore(&blkif_io_lock, flags);
- goto retry;
- }
-
- DISABLE_SCATTERGATHER();
- req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
- *req_d = *req;
-
- id = GET_ID_FROM_FREELIST();
- req_d->id = id;
- blk_shadow[id].request = (unsigned long)req;
-
- pickle_request(&blk_shadow[id], req);
-
- blk_ring.req_prod_pvt++;
- flush_requests();
-
- spin_unlock_irqrestore(&blkif_io_lock, flags);
-
- while ( !blkif_control_rsp_valid )
- {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(1);
- }
-
- memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
- blkif_control_rsp_valid = 0;
-}
-
-
-/* Send a driver status notification to the domain controller. */
-static void send_driver_status(int ok)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
- .length = sizeof(blkif_fe_driver_status_t),
- };
- blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
-
- msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
-/* Tell the controller to bring up the interface. */
-static void blkif_send_interface_connect(void)
-{
- ctrl_msg_t cmsg = {
- .type = CMSG_BLKIF_FE,
- .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
- .length = sizeof(blkif_fe_interface_connect_t),
- };
- blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
-
- msg->handle = 0;
- msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
-
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- msg->shmem_ref = gnttab_claim_grant_reference( &gref_head, gref_terminal
);
- ASSERT( msg->shmem_ref != -ENOSPC );
- gnttab_grant_foreign_access_ref ( msg->shmem_ref , rdomid,
msg->shmem_frame, 0 );
-#endif
-
- ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
-}
-
static void blkif_free(void)
{
/* Prevent new requests being issued until we fix things up. */
spin_lock_irq(&blkif_io_lock);
- recovery = 1;
blkif_state = BLKIF_STATE_DISCONNECTED;
spin_unlock_irq(&blkif_io_lock);
@@ -1160,31 +1008,6 @@
}
unbind_evtchn_from_irqhandler(blkif_evtchn, NULL);
blkif_evtchn = 0;
-}
-
-static void blkif_close(void)
-{
-}
-
-/* Move from CLOSED to DISCONNECTED state. */
-static void blkif_disconnect(void)
-{
- blkif_sring_t *sring;
-
- if ( blk_ring.sring != NULL )
- free_page((unsigned long)blk_ring.sring);
-
- sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
- SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
- blkif_state = BLKIF_STATE_DISCONNECTED;
- blkif_send_interface_connect();
-}
-
-static void blkif_reset(void)
-{
- blkif_free();
- blkif_disconnect();
}
static void blkif_recover(void)
@@ -1257,11 +1080,14 @@
blkif_state = BLKIF_STATE_CONNECTED;
}
-static void blkif_connect(blkif_fe_interface_status_t *status)
+static void blkif_connect(u16 evtchn, domid_t domid)
{
int err = 0;
- blkif_evtchn = status->evtchn;
+ blkif_evtchn = evtchn;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ rdomid = domid;
+#endif
err = bind_evtchn_to_irqhandler(
blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
@@ -1270,142 +1096,318 @@
WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
return;
}
-
- if ( recovery )
- {
- blkif_recover();
- }
- else
- {
- /* Transition to connected in case we need to do
- * a partition probe on a whole disk. */
- blkif_state = BLKIF_STATE_CONNECTED;
-
- /* Probe for discs attached to the interface. */
- xlvbd_init();
- }
-
- /* Kick pending requests. */
- spin_lock_irq(&blkif_io_lock);
- kick_pending_request_queues();
- spin_unlock_irq(&blkif_io_lock);
-}
-
-static void unexpected(blkif_fe_interface_status_t *status)
-{
- DPRINTK(" Unexpected blkif status %u in state %u\n",
- status->status, blkif_state);
-}
-
-static void blkif_status(blkif_fe_interface_status_t *status)
-{
+}
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+struct blkfront_info
+{
+ /* We watch the backend */
+ struct xenbus_watch watch;
+ int vdevice;
+ u16 handle;
+ int connected;
+ struct xenbus_device *dev;
+ char *backend;
+};
+
+static void watch_for_status(struct xenbus_watch *watch, const char *node)
+{
+ struct blkfront_info *info;
+ unsigned int binfo;
+ unsigned long sectors, sector_size;
+ int err;
+
+ info = container_of(watch, struct blkfront_info, watch);
+ node += strlen(watch->node);
+
+ /* FIXME: clean up when error on the other end. */
+ if (info->connected)
+ return;
+
+ err = xenbus_gather(watch->node,
+ "sectors", "%lu", §ors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", §or_size,
+ NULL);
+
+ if (err)
+ xenbus_dev_error(info->dev, err, "reading backend fields");
+ else {
+ xlvbd_add(sectors, info->vdevice, info->handle, binfo,
+ sector_size);
+ info->connected = 1;
+
+ /* First to connect? blkif is now connected. */
+ if (blkif_vbds_connected++ == 0)
+ blkif_state = BLKIF_STATE_CONNECTED;
+
+ xenbus_dev_ok(info->dev);
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ kick_pending_request_queues();
+ spin_unlock_irq(&blkif_io_lock);
+ }
+}
+
+static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id)
+{
+ blkif_sring_t *sring;
+ evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
+ int err;
+
+ sring = (void *)__get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE);
+
#ifdef CONFIG_XEN_BLKDEV_GRANT
- rdomid = status->domid; /* need to set rdomid early */
-#endif
-
- if ( status->handle != blkif_handle )
- {
- WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
- unexpected(status);
- return;
- }
-
- switch ( status->status )
- {
- case BLKIF_INTERFACE_STATUS_CLOSED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_close();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_DISCONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- blkif_disconnect();
- break;
- case BLKIF_STATE_DISCONNECTED:
- case BLKIF_STATE_CONNECTED:
- /* unexpected(status); */ /* occurs during suspend/resume */
- blkif_reset();
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CONNECTED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- unexpected(status);
- blkif_disconnect();
- blkif_connect(status);
- break;
- case BLKIF_STATE_DISCONNECTED:
- blkif_connect(status);
- break;
- case BLKIF_STATE_CONNECTED:
- unexpected(status);
- blkif_connect(status);
- break;
- }
- break;
-
- case BLKIF_INTERFACE_STATUS_CHANGED:
- switch ( blkif_state )
- {
- case BLKIF_STATE_CLOSED:
- case BLKIF_STATE_DISCONNECTED:
- unexpected(status);
- break;
- case BLKIF_STATE_CONNECTED:
- vbd_update();
- break;
- }
- break;
-
- default:
- WPRINTK(" Invalid blkif status: %d\n", status->status);
- break;
- }
-}
-
-
-static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
-{
- switch ( msg->subtype )
- {
- case CMSG_BLKIF_FE_INTERFACE_STATUS:
- blkif_status((blkif_fe_interface_status_t *)
- &msg->msg[0]);
- break;
- default:
- msg->length = 0;
- break;
- }
-
- ctrl_if_send_response(msg);
-}
-
-int wait_for_blkif(void)
+ shmem_ref = gnttab_claim_grant_reference(&gref_head,
+ gref_terminal);
+ ASSERT(shmem_ref != -ENOSPC);
+ gnttab_grant_foreign_access_ref(shmem_ref,
+ backend_id,
+ virt_to_mfn(blk_ring.sring),
+ 0);
+#endif
+
+ op.u.alloc_unbound.dom = backend_id;
+ err = HYPERVISOR_event_channel_op(&op);
+ if (err) {
+ free_page((unsigned long)blk_ring.sring);
+ blk_ring.sring = 0;
+ xenbus_dev_error(dev, err, "allocating event channel");
+ return err;
+ }
+ blkif_connect(op.u.alloc_unbound.port, backend_id);
+ return 0;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ char *backend;
+ const char *message;
+ int err, backend_id;
+
+ backend = xenbus_read(dev->nodename, "backend", NULL);
+ if (IS_ERR(backend)) {
+ err = PTR_ERR(backend);
+ if (err == -ENOENT)
+ goto out;
+ xenbus_dev_error(dev, err, "reading %s/backend",
+ dev->nodename);
+ goto out;
+ }
+ if (strlen(backend) == 0) {
+ err = -ENOENT;
+ goto free_backend;
+ }
+
+ /* FIXME: This driver can't handle backends on different
+ * domains. Check and fail gracefully. */
+ err = xenbus_scanf(dev->nodename, "backend-id", "%i", &backend_id);
+ if (err == -ENOENT)
+ goto free_backend;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading %s/backend-id",
+ dev->nodename);
+ goto free_backend;
+ }
+
+ /* First device? We create shared ring, alloc event channel. */
+ if (blkif_vbds == 0) {
+ err = setup_blkring(dev, backend_id);
+ if (err)
+ goto free_backend;
+ }
+
+ err = xenbus_transaction_start(dev->nodename);
+ if (err) {
+ xenbus_dev_error(dev, err, "starting transaction");
+ goto destroy_blkring;
+ }
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref);
+ if (err) {
+ message = "writing grant-id";
+ goto abort_transaction;
+ }
+#else
+ err = xenbus_printf(dev->nodename, "shared-frame", "%lu",
+ virt_to_mfn(blk_ring.sring));
+ if (err) {
+ message = "writing shared-frame";
+ goto abort_transaction;
+ }
+#endif
+ err = xenbus_printf(dev->nodename,
+ "event-channel", "%u", blkif_evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ info->watch.node = info->backend = backend;
+ info->watch.callback = watch_for_status;
+
+ err = register_xenbus_watch(&info->watch);
+ if (err) {
+ message = "registering watch on backend";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(0);
+ if (err) {
+ xenbus_dev_error(dev, err, "completing transaction");
+ goto destroy_blkring;
+ }
+ return 0;
+
+abort_transaction:
+ xenbus_transaction_end(1);
+ /* Have to do this *outside* transaction. */
+ xenbus_dev_error(dev, err, "%s", message);
+destroy_blkring:
+ if (blkif_vbds == 0)
+ blkif_free();
+free_backend:
+ kfree(backend);
+out:
+ printk("%s:%u = %i\n", __FILE__, __LINE__, err);
+ return err;
+}
+
+/* Setup supplies the backend dir, virtual device.
+
+ We place an event channel and shared frame entries.
+ We watch backend to wait if it's ok. */
+static int blkfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ struct blkfront_info *info;
+ int vdevice;
+
+ /* FIXME: Use dynamic device id if this is not set. */
+ err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
+ if (err == -ENOENT)
+ return err;
+ if (err < 0) {
+ xenbus_dev_error(dev, err, "reading virtual-device");
+ return err;
+ }
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_error(dev, err, "allocating info structure");
+ return err;
+ }
+ info->dev = dev;
+ info->vdevice = vdevice;
+ info->connected = 0;
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+ dev->data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ kfree(info);
+ return err;
+ }
+
+ /* Call once in case entries already there. */
+ watch_for_status(&info->watch, info->watch.node);
+ blkif_vbds++;
+ return 0;
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+
+ if (info->backend)
+ unregister_xenbus_watch(&info->watch);
+
+ if (info->connected) {
+ xlvbd_del(info->handle);
+ blkif_vbds_connected--;
+ }
+ kfree(info->backend);
+ kfree(info);
+
+ if (--blkif_vbds == 0)
+ blkif_free();
+
+ return 0;
+}
+
+static int blkfront_suspend(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+
+ unregister_xenbus_watch(&info->watch);
+ kfree(info->backend);
+ info->backend = NULL;
+
+ if (--blkif_vbds == 0) {
+ recovery = 1;
+ blkif_free();
+ }
+
+ return 0;
+}
+
+static int blkfront_resume(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->data;
+ int err;
+
+ /* FIXME: Check geometry hasn't changed here... */
+ err = talk_to_backend(dev, info);
+ if (!err) {
+ if (blkif_vbds++ == 0)
+ blkif_recover();
+ }
+ return err;
+}
+
+static struct xenbus_driver blkfront = {
+ .name = "vbd",
+ .owner = THIS_MODULE,
+ .ids = blkfront_ids,
+ .probe = blkfront_probe,
+ .remove = blkfront_remove,
+ .resume = blkfront_resume,
+ .suspend = blkfront_suspend,
+};
+
+static void __init init_blk_xenbus(void)
+{
+ xenbus_register_device(&blkfront);
+}
+
+static int wait_for_blkif(void)
{
int err = 0;
int i;
- send_driver_status(1);
/*
* We should read 'nr_interfaces' from response message and wait
* for notifications before proceeding. For now we assume that we
* will be notified of exactly one interface.
*/
- for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
+ for ( i=0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++ )
{
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
@@ -1419,7 +1421,7 @@
return err;
}
-int __init xlblk_init(void)
+static int __init xlblk_init(void)
{
int i;
@@ -1443,27 +1445,11 @@
blk_shadow[i].req.id = i+1;
blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
- (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
- CALLBACK_IN_BLOCKING_CONTEXT);
+ init_blk_xenbus();
wait_for_blkif();
return 0;
-}
-
-void blkdev_suspend(void)
-{
-}
-
-void blkdev_resume(void)
-{
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- int i, j;
- for ( i = 0; i < BLK_RING_SIZE; i++ )
- for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ )
- blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID;
-#endif
- send_driver_status(1);
}
static void blkif_completion(struct blk_shadow *s)
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h Fri Aug 19 18:19:28 2005
@@ -100,6 +100,7 @@
struct xlbd_disk_info {
int xd_device;
+ blkif_vdev_t handle;
struct xlbd_major_info *mi;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
struct xlbd_disk_info *next_waiting;
@@ -119,17 +120,10 @@
unsigned command, unsigned long argument);
extern int blkif_check(dev_t dev);
extern int blkif_revalidate(dev_t dev);
-extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
-#ifdef CONFIG_XEN_BLKDEV_GRANT
-extern void blkif_control_probe_send(
- blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
-#endif
extern void do_blkif_request (request_queue_t *rq);
-extern void xlvbd_update_vbds(void);
-
/* Virtual block-device subsystem. */
-extern int xlvbd_init(void);
-extern void xlvbd_cleanup(void);
-
+int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
+ u16 info, u16 sector_size);
+void xlvbd_del(blkif_vdev_t handle);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c Fri Aug 19 18:19:28 2005
@@ -46,8 +46,9 @@
struct lvdisk
{
blkif_sector_t capacity; /* 0: Size in terms of 512-byte sectors. */
- blkif_vdev_t device; /* 8: Device number (opaque 16 bit value). */
- u16 info;
+ blkif_vdev_t handle; /* 8: Device number (opaque 16 bit value). */
+ u16 info;
+ dev_t dev;
struct list_head list;
};
@@ -85,7 +86,7 @@
/* Information about our VBDs. */
#define MAX_VBDS 64
-struct list_head vbds_list;
+static LIST_HEAD(vbds_list);
#define MAJOR_XEN(dev) ((dev)>>8)
#define MINOR_XEN(dev) ((dev) & 0xff)
@@ -116,49 +117,6 @@
{
list_del(&disk->list);
kfree(disk);
-}
-
-static vdisk_t *xlvbd_probe(int *ret)
-{
- blkif_response_t rsp;
- blkif_request_t req;
- vdisk_t *disk_info = NULL;
- unsigned long buf;
- int nr;
-
- buf = __get_free_page(GFP_KERNEL);
- if ((void *)buf == NULL)
- goto out;
-
- memset(&req, 0, sizeof(req));
- req.operation = BLKIF_OP_PROBE;
- req.nr_segments = 1;
-#ifdef CONFIG_XEN_BLKDEV_GRANT
- blkif_control_probe_send(&req, &rsp,
- (unsigned long)(virt_to_machine(buf)));
-#else
- req.frame_and_sects[0] = blkif_fas(virt_to_machine(buf), 0,
(PAGE_SIZE/512)-1);
-
- blkif_control_send(&req, &rsp);
-#endif
- if ( rsp.status <= 0 ) {
- WPRINTK("Could not probe disks (%d)\n", rsp.status);
- goto out;
- }
- nr = rsp.status;
- if ( nr > MAX_VBDS )
- nr = MAX_VBDS;
-
- disk_info = kmalloc(nr * sizeof(vdisk_t), GFP_KERNEL);
- if (disk_info != NULL)
- memcpy(disk_info, (void *) buf, nr * sizeof(vdisk_t));
-
- if (ret != NULL)
- *ret = nr;
-
-out:
- free_page(buf);
- return disk_info;
}
static struct xlbd_major_info *xlbd_alloc_major_info(
@@ -189,6 +147,7 @@
break;
}
+ printk("Registering block device major %i\n", ptr->major);
if (register_blkdev(ptr->major, ptr->type->devname)) {
WPRINTK("can't get major %d with name %s\n",
ptr->major, ptr->type->devname);
@@ -231,7 +190,7 @@
xlbd_alloc_major_info(major, minor, index));
}
-static int xlvbd_init_blk_queue(struct gendisk *gd, vdisk_t *disk)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
{
request_queue_t *rq;
@@ -242,7 +201,7 @@
elevator_init(rq, "noop");
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(rq, disk->sector_size);
+ blk_queue_hardsect_size(rq, sector_size);
blk_queue_max_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
@@ -261,8 +220,9 @@
return 0;
}
-struct gendisk *xlvbd_alloc_gendisk(
- struct xlbd_major_info *mi, int minor, vdisk_t *disk)
+static struct gendisk *xlvbd_alloc_gendisk(
+ struct xlbd_major_info *mi, int minor, blkif_sector_t capacity,
+ int device, blkif_vdev_t handle, u16 info, u16 sector_size)
{
struct gendisk *gd;
struct xlbd_disk_info *di;
@@ -273,7 +233,8 @@
return NULL;
memset(di, 0, sizeof(*di));
di->mi = mi;
- di->xd_device = disk->device;
+ di->xd_device = device;
+ di->handle = handle;
if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
nr_minors = 1 << mi->type->partn_shift;
@@ -296,22 +257,22 @@
gd->first_minor = minor;
gd->fops = &xlvbd_block_fops;
gd->private_data = di;
- set_capacity(gd, disk->capacity);
-
- if (xlvbd_init_blk_queue(gd, disk)) {
+ set_capacity(gd, capacity);
+
+ if (xlvbd_init_blk_queue(gd, sector_size)) {
del_gendisk(gd);
goto out;
}
di->rq = gd->queue;
- if (disk->info & VDISK_READONLY)
+ if (info & VDISK_READONLY)
set_disk_ro(gd, 1);
- if (disk->info & VDISK_REMOVABLE)
+ if (info & VDISK_REMOVABLE)
gd->flags |= GENHD_FL_REMOVABLE;
- if (disk->info & VDISK_CDROM)
+ if (info & VDISK_CDROM)
gd->flags |= GENHD_FL_CD;
add_disk(gd);
@@ -323,38 +284,36 @@
return NULL;
}
-static int xlvbd_device_add(struct list_head *list, vdisk_t *disk)
+int xlvbd_add(blkif_sector_t capacity, int device, blkif_vdev_t handle,
+ u16 info, u16 sector_size)
{
struct lvdisk *new;
- int minor;
- dev_t device;
struct block_device *bd;
struct gendisk *gd;
struct xlbd_major_info *mi;
- mi = xlbd_get_major_info(disk->device);
+ mi = xlbd_get_major_info(device);
if (mi == NULL)
return -EPERM;
new = xlvbd_device_alloc();
if (new == NULL)
- return -1;
- new->capacity = disk->capacity;
- new->device = disk->device;
- new->info = disk->info;
-
- minor = MINOR_XEN(disk->device);
- device = MKDEV(mi->major, minor);
-
- bd = bdget(device);
+ return -ENOMEM;
+ new->capacity = capacity;
+ new->info = info;
+ new->handle = handle;
+ new->dev = MKDEV(MAJOR_XEN(device), MINOR_XEN(device));
+
+ bd = bdget(new->dev);
if (bd == NULL)
goto out;
- gd = xlvbd_alloc_gendisk(mi, minor, disk);
+ gd = xlvbd_alloc_gendisk(mi, MINOR_XEN(device), capacity, device, handle,
+ info, sector_size);
if (gd == NULL)
goto out_bd;
- list_add(&new->list, list);
+ list_add(&new->list, &vbds_list);
out_bd:
bdput(bd);
out:
@@ -363,27 +322,26 @@
static int xlvbd_device_del(struct lvdisk *disk)
{
- dev_t device;
struct block_device *bd;
struct gendisk *gd;
struct xlbd_disk_info *di;
int ret = 0, unused;
request_queue_t *rq;
- device = MKDEV(MAJOR_XEN(disk->device), MINOR_XEN(disk->device));
-
- bd = bdget(device);
+ bd = bdget(disk->dev);
if (bd == NULL)
return -1;
- gd = get_gendisk(device, &unused);
+ gd = get_gendisk(disk->dev, &unused);
di = gd->private_data;
+#if 0 /* This is wrong: hda and hdb share same major, for example. */
if (di->mi->usage != 0) {
- WPRINTK("disk removal failed: used [dev=%x]\n", device);
+ WPRINTK("disk removal failed: used [dev=%x]\n", disk->dev);
ret = -1;
goto out;
}
+#endif
rq = gd->queue;
del_gendisk(gd);
@@ -391,110 +349,19 @@
blk_cleanup_queue(rq);
xlvbd_device_free(disk);
-out:
bdput(bd);
return ret;
}
-static int xlvbd_device_update(struct lvdisk *ldisk, vdisk_t *disk)
-{
- dev_t device;
- struct block_device *bd;
- struct gendisk *gd;
- int unused;
-
- if ((ldisk->capacity == disk->capacity) && (ldisk->info == disk->info))
- return 0;
-
- device = MKDEV(MAJOR_XEN(ldisk->device), MINOR_XEN(ldisk->device));
-
- bd = bdget(device);
- if (bd == NULL)
- return -1;
-
- gd = get_gendisk(device, &unused);
- set_capacity(gd, disk->capacity);
- ldisk->capacity = disk->capacity;
-
- bdput(bd);
-
- return 0;
-}
-
-void xlvbd_refresh(void)
-{
- vdisk_t *newdisks;
- struct list_head *tmp, *tmp2;
- struct lvdisk *disk;
- int i, nr;
-
- newdisks = xlvbd_probe(&nr);
- if (newdisks == NULL) {
- WPRINTK("failed to probe\n");
- return;
- }
-
- i = 0;
- list_for_each_safe(tmp, tmp2, &vbds_list) {
- disk = list_entry(tmp, struct lvdisk, list);
-
- for (i = 0; i < nr; i++) {
- if ( !newdisks[i].device )
- continue;
- if ( disk->device == newdisks[i].device ) {
- xlvbd_device_update(disk, &newdisks[i]);
- newdisks[i].device = 0;
- break;
- }
- }
- if (i == nr) {
- xlvbd_device_del(disk);
- newdisks[i].device = 0;
- }
- }
- for (i = 0; i < nr; i++)
- if ( newdisks[i].device )
- xlvbd_device_add(&vbds_list, &newdisks[i]);
- kfree(newdisks);
-}
-
-/*
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
- * state. The VBDs need to be updated in this way when the domain is
- * initialised and also each time we receive an XLBLK_UPDATE event.
- */
-void xlvbd_update_vbds(void)
-{
- xlvbd_refresh();
-}
-
-/*
- * Set up all the linux device goop for the virtual block devices
- * (vbd's) that we know about. Note that although from the backend
- * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
- * number, the domain creation tools conventionally allocate these
- * numbers to correspond to those used by 'real' linux -- this is just
- * for convenience as it means e.g. that the same /etc/fstab can be
- * used when booting with or without Xen.
- */
-int xlvbd_init(void)
-{
- int i, nr;
- vdisk_t *disks;
-
- INIT_LIST_HEAD(&vbds_list);
-
- memset(major_info, 0, sizeof(major_info));
-
- disks = xlvbd_probe(&nr);
- if (disks == NULL) {
- WPRINTK("failed to probe\n");
- return -1;
- }
-
- for (i = 0; i < nr; i++)
- xlvbd_device_add(&vbds_list, &disks[i]);
-
- kfree(disks);
- return 0;
-}
+void xlvbd_del(blkif_vdev_t handle)
+{
+ struct lvdisk *i;
+
+ list_for_each_entry(i, &vbds_list, list) {
+ if (i->handle == handle) {
+ xlvbd_device_del(i);
+ return;
+ }
+ }
+ BUG();
+}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.h Fri Aug 19 18:19:28 2005
@@ -87,7 +87,7 @@
struct work_struct work;
#ifdef CONFIG_XEN_BLKDEV_GRANT
u16 shmem_handle;
- memory_t shmem_vaddr;
+ unsigned long shmem_vaddr;
grant_ref_t shmem_ref;
#endif
} blkif_t;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Thu Aug
18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c Fri Aug
19 18:19:28 2005
@@ -320,7 +320,7 @@
};
blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = 0;
- msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT;
+ msg->shmem_frame = virt_to_mfn(blktap_be_ring.sring);
ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/netback/common.h
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Aug 18 18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Fri Aug 19 18:19:28 2005
@@ -49,13 +49,13 @@
unsigned long tx_shmem_frame;
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
u16 tx_shmem_handle;
- memory_t tx_shmem_vaddr;
+ unsigned long tx_shmem_vaddr;
grant_ref_t tx_shmem_ref;
#endif
unsigned long rx_shmem_frame;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
u16 rx_shmem_handle;
- memory_t rx_shmem_vaddr;
+ unsigned long rx_shmem_vaddr;
grant_ref_t rx_shmem_ref;
#endif
unsigned int evtchn;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/netback/netback.c
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Aug 19
18:19:28 2005
@@ -43,7 +43,7 @@
static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
- memory_t addr,
+ unsigned long addr,
u16 size,
u16 csum_valid);
@@ -251,7 +251,7 @@
#else
struct mmuext_op *mmuext;
#endif
- unsigned long vdata, mdata, new_mfn;
+ unsigned long vdata, old_mfn, new_mfn;
struct sk_buff_head rxq;
struct sk_buff *skb;
u16 notify_list[NETIF_RX_RING_SIZE];
@@ -271,7 +271,7 @@
{
netif = netdev_priv(skb->dev);
vdata = (unsigned long)skb->data;
- mdata = virt_to_machine(vdata);
+ old_mfn = virt_to_mfn(vdata);
/* Memory squeeze? Back off for an arbitrary while. */
if ( (new_mfn = alloc_mfn()) == 0 )
@@ -293,7 +293,7 @@
mcl++;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- gop->mfn = mdata >> PAGE_SHIFT;
+ gop->mfn = old_mfn;
gop->domid = netif->domid;
gop->handle = netif->rx->ring[
MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
@@ -308,7 +308,7 @@
mcl++;
mmuext->cmd = MMUEXT_REASSIGN_PAGE;
- mmuext->mfn = mdata >> PAGE_SHIFT;
+ mmuext->mfn = old_mfn;
mmuext++;
#endif
mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
@@ -318,7 +318,7 @@
__skb_queue_tail(&rxq, skb);
#ifdef DEBUG_GRANT
- dump_packet('a', mdata, vdata);
+ dump_packet('a', old_mfn, vdata);
#endif
/* Filled the batch queue? */
if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
@@ -345,10 +345,8 @@
mcl = rx_mcl;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_donate,
- grant_rx_op, gop - grant_rx_op))) {
- BUG();
- }
+ BUG_ON(HYPERVISOR_grant_table_op(
+ GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
gop = grant_rx_op;
#else
mmuext = rx_mmuext;
@@ -361,10 +359,9 @@
/* Rederive the machine addresses. */
new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- mdata = (unsigned long)skb->data & ~PAGE_MASK;
-#else
- mdata = ((mmuext[0].mfn << PAGE_SHIFT) |
- ((unsigned long)skb->data & ~PAGE_MASK));
+ old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
+#else
+ old_mfn = mmuext[0].mfn;
#endif
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
@@ -379,18 +376,20 @@
/* Check the reassignment error code. */
status = NETIF_RSP_OKAY;
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
- BUG_ON(gop->status != 0);
+ BUG_ON(gop->status != 0); /* XXX */
#else
if ( unlikely(mcl[1].result != 0) )
{
DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
- free_mfn(mdata >> PAGE_SHIFT);
+ free_mfn(old_mfn);
status = NETIF_RSP_ERROR;
}
#endif
evtchn = netif->evtchn;
id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
- if ( make_rx_response(netif, id, status, mdata,
+ if ( make_rx_response(netif, id, status,
+ (old_mfn << PAGE_SHIFT) | /* XXX */
+ ((unsigned long)skb->data & ~PAGE_MASK),
size, skb->proto_csum_valid) &&
(rx_notify[evtchn] == 0) )
{
@@ -888,7 +887,7 @@
static int make_rx_response(netif_t *netif,
u16 id,
s8 st,
- memory_t addr,
+ unsigned long addr,
u16 size,
u16 csum_valid)
{
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Aug 19
18:19:28 2005
@@ -448,11 +448,10 @@
}
grant_rx_ref[id] = ref;
gnttab_grant_foreign_transfer_ref(ref, rdomid,
- virt_to_machine(
- skb->head) >> PAGE_SHIFT);
+ virt_to_mfn(skb->head));
np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
#endif
- rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
+ rx_pfn_array[i] = virt_to_mfn(skb->head);
/* Remove this page from pseudo phys map before passing back to Xen. */
phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
@@ -543,13 +542,14 @@
printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
BUG();
}
- mfn = virt_to_machine(skb->data) >> PAGE_SHIFT;
+ mfn = virt_to_mfn(skb->data);
gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
- tx->addr = (ref << PAGE_SHIFT) | ((unsigned long)skb->data & ~PAGE_MASK);
+ tx->addr = ref << PAGE_SHIFT;
grant_tx_ref[id] = ref;
#else
- tx->addr = virt_to_machine(skb->data);
-#endif
+ tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+ tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
tx->size = skb->len;
tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
@@ -720,7 +720,7 @@
while ((skb = __skb_dequeue(&rxq)) != NULL) {
#ifdef GRANT_DEBUG
printk(KERN_ALERT "#### rx_poll dequeue vdata=%p mfn=%lu\n",
- skb->data, virt_to_machine(skb->data)>>PAGE_SHIFT);
+ skb->data, virt_to_mfn(skb->data));
dump_packet('d', skb->data, (unsigned long)skb->data);
#endif
/*
@@ -854,18 +854,23 @@
* interface has been down.
*/
for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
- if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
- struct sk_buff *skb = np->tx_skbs[i];
-
- tx = &np->tx->ring[requeue_idx++].req;
-
- tx->id = i;
- tx->addr = virt_to_machine(skb->data);
- tx->size = skb->len;
-
- np->stats.tx_bytes += skb->len;
- np->stats.tx_packets++;
- }
+ if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
+ struct sk_buff *skb = np->tx_skbs[i];
+
+ tx = &np->tx->ring[requeue_idx++].req;
+
+ tx->id = i;
+#ifdef CONFIG_XEN_NETDEV_GRANT_TX
+ tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
+#else
+ tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
+#endif
+ tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
+ tx->size = skb->len;
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+ }
}
wmb();
np->tx->req_prod = requeue_idx;
@@ -922,7 +927,7 @@
netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
msg->handle = np->handle;
- msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
+ msg->tx_shmem_frame = virt_to_mfn(np->tx);
#ifdef CONFIG_XEN_NETDEV_GRANT_TX
msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head,
gref_tx_terminal);
@@ -934,7 +939,7 @@
msg->tx_shmem_frame, 0);
#endif
- msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
+ msg->rx_shmem_frame = virt_to_mfn(np->rx);
#ifdef CONFIG_XEN_NETDEV_GRANT_RX
msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head,
gref_rx_terminal);
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Aug 19
18:19:28 2005
@@ -657,8 +657,8 @@
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
FOREIGN_FRAME((buffer_mach + offset) >> PAGE_SHIFT);
- ASSERT(virt_to_machine(MMAP_VADDR(pending_idx, i))
- == buffer_mach + i << PAGE_SHIFT);
+ ASSERT(virt_to_mfn(MMAP_VADDR(pending_idx, i))
+ == ((buffer_mach >> PAGE_SHIFT) + i));
}
if ( req->pipe_type == 0 && req->num_iso > 0 ) /* Maybe schedule ISO... */
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/usbfront/usbfront.c Fri Aug 19
18:19:28 2005
@@ -195,7 +195,7 @@
}
urb_priv->schedule = schedule;
- req->iso_schedule = virt_to_machine(schedule);
+ req->iso_schedule = virt_to_mfn(schedule) << PAGE_SHIFT;
return 0;
}
@@ -212,7 +212,7 @@
#if DEBUG
printk(KERN_DEBUG
"usbif = %p, req_prod = %d (@ 0x%lx), resp_prod = %d, resp_cons
= %d\n",
- usbif, usbif->req_prod, virt_to_machine(&usbif->req_prod),
+ usbif, usbif->req_prod, virt_to_mfn(&usbif->req_prod),
usbif->resp_prod, xhci->usb_resp_cons);
#endif
@@ -232,7 +232,7 @@
req->operation = USBIF_OP_IO;
req->port = 0; /* We don't care what the port is. */
req->id = (unsigned long) urb->hcpriv;
- req->transfer_buffer = virt_to_machine(urb->transfer_buffer);
+ req->transfer_buffer = virt_to_mfn(urb->transfer_buffer) << PAGE_SHIFT;
req->devnum = usb_pipedevice(urb->pipe);
req->direction = usb_pipein(urb->pipe);
req->speed = usb_pipeslow(urb->pipe);
@@ -280,7 +280,7 @@
printk(KERN_DEBUG
"queuing probe: req_prod = %d (@ 0x%lx), resp_prod = %d, "
"resp_cons = %d\n", usbif->req_prod,
- virt_to_machine(&usbif->req_prod),
+ virt_to_mfn(&usbif->req_prod),
usbif->resp_prod, xhci->usb_resp_cons);
#endif
@@ -1555,7 +1555,7 @@
cmsg.type = CMSG_USBIF_FE;
cmsg.subtype = CMSG_USBIF_FE_INTERFACE_CONNECT;
cmsg.length = sizeof(usbif_fe_interface_connect_t);
- up.shmem_frame = virt_to_machine(sring) >> PAGE_SHIFT;
+ up.shmem_frame = virt_to_mfn(sring);
memcpy(cmsg.msg, &up, sizeof(up));
/* Tell the controller to bring up the interface. */
@@ -1599,7 +1599,7 @@
DPRINTK(KERN_INFO __FILE__
": USB XHCI: SHM at %p (0x%lx), EVTCHN %d\n",
- xhci->usb_ring.sring, virt_to_machine(xhci->usbif),
+ xhci->usb_ring.sring, virt_to_mfn(xhci->usbif),
xhci->evtchn);
xhci->state = USBIF_STATE_CONNECTED;
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c Fri Aug 19
18:19:28 2005
@@ -48,13 +48,12 @@
static inline struct ringbuf_head *outbuf(void)
{
- return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT);
+ return mfn_to_virt(xen_start_info.store_mfn);
}
static inline struct ringbuf_head *inbuf(void)
{
- return machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT)
- + PAGE_SIZE/2;
+ return mfn_to_virt(xen_start_info.store_mfn) + PAGE_SIZE/2;
}
static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
@@ -219,8 +218,7 @@
}
/* FIXME zero out page -- domain builder should probably do this*/
- memset(machine_to_virt(xen_start_info.store_mfn << PAGE_SHIFT),
- 0, PAGE_SIZE);
+ memset(mfn_to_virt(xen_start_info.store_mfn), 0, PAGE_SIZE);
return 0;
}
diff -r 99914b54f7bf -r 81576d3d1ca8
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu Aug 18
18:40:02 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Fri Aug 19
18:19:28 2005
@@ -48,15 +48,7 @@
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
{
for (; !streq(arr->devicetype, ""); arr++) {
- if (!streq(arr->devicetype, dev->devicetype))
- continue;
-
- /* If they don't care what subtype, it's a match. */
- if (streq(arr->subtype, ""))
- return arr;
-
- /* If they care, device must have (same) subtype. */
- if (dev->subtype && streq(arr->subtype, dev->subtype))
+ if (streq(arr->devicetype, dev->devicetype))
return arr;
}
return NULL;
@@ -72,10 +64,102 @@
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+ struct device dev;
+};
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ nodename = strchr(nodename, '/');
+ if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+ return -EINVAL;
+ }
+
+ strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+ if (!strchr(bus_id, '/')) {
+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+ return -EINVAL;
+ }
+ *strchr(bus_id, '/') = '-';
+ return 0;
+}
+
/* Bus type for frontend drivers. */
-static struct bus_type xenbus_type = {
- .name = "xenbus",
- .match = xenbus_match,
+static int xenbus_probe_frontend(const char *type, const char *name);
+static struct xen_bus_type xenbus_frontend = {
+ .root = "device",
+ .levels = 2, /* device/type/<id> */
+ .get_bus_id = frontend_bus_id,
+ .probe = xenbus_probe_frontend,
+ .bus = {
+ .name = "xen",
+ .match = xenbus_match,
+ },
+ .dev = {
+ .bus_id = "xen",
+ },
+};
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ int domid, err;
+ const char *devid, *type, *frontend;
+ unsigned int typelen;
+
+ type = strchr(nodename, '/');
+ if (!type)
+ return -EINVAL;
+ type++;
+ typelen = strcspn(type, "/");
+ if (!typelen || type[typelen] != '/')
+ return -EINVAL;
+
+ devid = strrchr(nodename, '/') + 1;
+
+ err = xenbus_gather(nodename, "frontend-id", "%i", &domid,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (err)
+ return err;
+ if (strlen(frontend) == 0)
+ err = -ERANGE;
+
+ if (!err && !xenbus_exists(frontend, ""))
+ err = -ENOENT;
+
+ if (err) {
+ kfree(frontend);
+ return err;
+ }
+
+ if (snprintf(bus_id, BUS_ID_SIZE,
+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+ return -ENOSPC;
+ return 0;
+}
+
+static int xenbus_probe_backend(const char *type, const char *uuid);
+static struct xen_bus_type xenbus_backend = {
+ .root = "backend",
+ .levels = 3, /* backend/type/<frontend>/<id> */
+ .get_bus_id = backend_bus_id,
+ .probe = xenbus_probe_backend,
+ .bus = {
+ .name = "xen-backend",
+ .match = xenbus_match,
+ },
+ .dev = {
+ .bus_id = "xen-backend",
+ },
};
static int xenbus_dev_probe(struct device *_dev)
@@ -104,12 +188,13 @@
return drv->remove(dev);
}
-int xenbus_register_driver(struct xenbus_driver *drv)
+static int xenbus_register_driver(struct xenbus_driver *drv,
+ struct xen_bus_type *bus)
{
int err;
drv->driver.name = drv->name;
- drv->driver.bus = &xenbus_type;
+ drv->driver.bus = &bus->bus;
drv->driver.owner = drv->owner;
drv->driver.probe = xenbus_dev_probe;
drv->driver.remove = xenbus_dev_remove;
@@ -120,6 +205,16 @@
return err;
}
+int xenbus_register_device(struct xenbus_driver *drv)
+{
+ return xenbus_register_driver(drv, &xenbus_frontend);
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+ return xenbus_register_driver(drv, &xenbus_backend);
+}
+
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
down(&xenbus_lock);
@@ -130,52 +225,98 @@
struct xb_find_info
{
struct xenbus_device *dev;
- const char *busid;
+ const char *nodename;
};
static int cmp_dev(struct device *dev, void *data)
{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
struct xb_find_info *info = data;
- if (streq(dev->bus_id, info->busid)) {
- info->dev = container_of(get_device(dev),
- struct xenbus_device, dev);
+ if (streq(xendev->nodename, info->nodename)) {
+ info->dev = xendev;
+ get_device(dev);
return 1;
}
return 0;
}
-/* FIXME: device_find is fixed in 2.6.13-rc2 according to Greg KH --RR */
-struct xenbus_device *xenbus_device_find(const char *busid)
-{
- struct xb_find_info info = { .dev = NULL, .busid = busid };
-
- bus_for_each_dev(&xenbus_type, NULL, &info, cmp_dev);
+struct xenbus_device *xenbus_device_find(const char *nodename,
+ struct bus_type *bus)
+{
+ struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+ bus_for_each_dev(bus, NULL, &info, cmp_dev);
return info.dev;
}
+static int cleanup_dev(struct devic |