WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] x86: Fix lapic timer stop issue in deep C

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] x86: Fix lapic timer stop issue in deep C state
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 21 May 2008 05:30:07 -0700
Delivery-date: Wed, 21 May 2008 05:30:07 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1211362737 -3600
# Node ID d795e15b85a788d0389d24963897cf480dcab0e9
# Parent  672c09aad49df0b8056c795bd6c351746d037975
x86: Fix lapic timer stop issue in deep C state

Local APIC timer may stop at deep C state (C3/C4...) entry/exit. this
patch add the logic that use platform timer (HPET) to reenable local
APIC timer at C state entry/exit.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir.fraser@xxxxxxxxxx>
---
 xen/arch/x86/Makefile        |    1 
 xen/arch/x86/acpi/cpu_idle.c |   11 -
 xen/arch/x86/hpet.c          |  291 +++++++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/time.c          |   59 ++------
 xen/common/timer.c           |    8 -
 xen/include/asm-x86/hpet.h   |   20 ++
 xen/include/xen/timer.h      |    9 +
 7 files changed, 348 insertions(+), 51 deletions(-)

diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/Makefile     Wed May 21 10:38:57 2008 +0100
@@ -50,6 +50,7 @@ obj-y += machine_kexec.o
 obj-y += machine_kexec.o
 obj-y += crash.o
 obj-y += tboot.o
+obj-y += hpet.o
 
 obj-$(crash_debug) += gdbstub.o
 
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c      Wed May 21 10:38:57 2008 +0100
@@ -39,6 +39,7 @@
 #include <xen/smp.h>
 #include <asm/cache.h>
 #include <asm/io.h>
+#include <asm/hpet.h>
 #include <xen/guest_access.h>
 #include <public/platform.h>
 #include <asm/processor.h>
@@ -438,19 +439,19 @@ static void acpi_processor_idle(void)
         t1 = inl(pmtmr_ioport);
 
         /*
-         * FIXME: Before invoking C3, be aware that TSC/APIC timer may be 
+         * Before invoking C3, be aware that TSC/APIC timer may be 
          * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
          * deep C state can't work correctly.
          */
         /* preparing TSC stop */
         cstate_save_tsc();
-        /* placeholder for preparing APIC stop */
-
+        /* preparing APIC stop */
+        hpet_broadcast_enter();
         /* Invoke C3 */
         acpi_idle_do_entry(cx);
 
-        /* placeholder for recovering APIC */
-
+        /* recovering APIC */
+        hpet_broadcast_exit();
         /* recovering TSC */
         cstate_restore_tsc();
 
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/hpet.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hpet.c       Wed May 21 10:38:57 2008 +0100
@@ -0,0 +1,291 @@
+/******************************************************************************
+ * arch/x86/hpet.c
+ * 
+ * HPET management.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/time.h>
+#include <xen/timer.h>
+#include <xen/smp.h>
+#include <xen/softirq.h>
+#include <asm/fixmap.h>
+#include <asm/div64.h>
+#include <asm/hpet.h>
+
+#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
+
+#define MAX_DELTA_NS MILLISECS(10*1000)
+#define MIN_DELTA_NS MICROSECS(1)
+
+struct hpet_event_channel
+{
+    unsigned long mult;
+    int           shift;
+    s_time_t      next_event;
+    cpumask_t     cpumask;
+    spinlock_t    lock;
+    void          (*event_handler)(struct hpet_event_channel *);
+};
+static struct hpet_event_channel hpet_event;
+
+unsigned long hpet_address;
+
+/*
+ * Calculate a multiplication factor for scaled math, which is used to convert
+ * nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ *
+ * div_sc is the rearranged equation to calculate a factor from a given clock
+ * ticks / nanoseconds ratio:
+ *
+ * factor = (clock_ticks << shift) / nanoseconds
+ */
+static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
+                                   int shift)
+{
+    uint64_t tmp = ((uint64_t)ticks) << shift;
+
+    do_div(tmp, nsec);
+    return (unsigned long) tmp;
+}
+
+/*
+ * Convert nanoseconds based values to clock ticks:
+ *
+ * clock_ticks = (nanoseconds * factor) >> shift.
+ */
+static inline unsigned long ns2ticks(unsigned long nsec, int shift,
+                                     unsigned long factor)
+{
+    uint64_t tmp = ((uint64_t)nsec * factor) >> shift;
+
+    return (unsigned long) tmp;
+}
+
+static int hpet_legacy_next_event(unsigned long delta)
+{
+    unsigned long cnt;
+
+    cnt = hpet_read32(HPET_COUNTER);
+    cnt += delta;
+    hpet_write32(cnt, HPET_T0_CMP);
+
+    return ((long)(hpet_read32(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
+}
+
+static int reprogram_hpet_evt_channel(
+    struct hpet_event_channel *ch,
+    s_time_t expire, s_time_t now, int force)
+{
+    int64_t delta;
+    int ret;
+
+    if ( unlikely(expire < 0) )
+    {
+        printk(KERN_DEBUG "reprogram: expire < 0\n");
+        return -ETIME;
+    }
+
+    delta = expire - now;
+    if ( delta <= 0 )
+    {
+        printk(KERN_DEBUG "reprogram: expire(%"PRIx64") < "
+               "now(%"PRIx64")\n", expire, now);
+        if ( !force )
+            return -ETIME;
+    }
+
+    ch->next_event = expire;
+
+    delta = min_t(int64_t, delta, MAX_DELTA_NS);
+    delta = max_t(int64_t, delta, MIN_DELTA_NS);
+    delta = ns2ticks(delta, ch->shift, ch->mult);
+
+    ret = hpet_legacy_next_event(delta);
+    while ( ret && force )
+    {
+        delta += delta;
+        ret = hpet_legacy_next_event(delta);
+    }
+
+    return ret;
+}
+
+static int evt_do_broadcast(cpumask_t mask)
+{
+    int ret = 0, cpu = smp_processor_id();
+
+    if ( cpu_isset(cpu, mask) )
+    {
+        cpu_clear(cpu, mask);
+        raise_softirq(TIMER_SOFTIRQ);
+        ret = 1;
+    }
+
+    if ( !cpus_empty(mask) )
+    {
+       cpumask_raise_softirq(mask, TIMER_SOFTIRQ);
+       ret = 1;
+    }
+    return ret;
+}
+
+static void handle_hpet_broadcast(struct hpet_event_channel *ch)
+{
+    cpumask_t mask;
+    s_time_t now, next_event;
+    int cpu, current_cpu = smp_processor_id();
+
+    spin_lock(&ch->lock);
+
+    if ( cpu_isset(current_cpu, ch->cpumask) )
+        printk(KERN_DEBUG "WARNING: current cpu%d in bc_mask\n", current_cpu);
+again:
+    ch->next_event = STIME_MAX;
+    next_event = STIME_MAX;
+    mask = (cpumask_t)CPU_MASK_NONE;
+    now = NOW();
+
+    /* find all expired events */
+    for_each_cpu_mask(cpu, ch->cpumask)
+    {
+        if ( per_cpu(timer_deadline, cpu) <= now )
+            cpu_set(cpu, mask);
+        else if ( per_cpu(timer_deadline, cpu) < next_event )
+            next_event = per_cpu(timer_deadline, cpu);
+    }
+    if ( per_cpu(timer_deadline, current_cpu) <= now )
+        cpu_set(current_cpu, mask);
+
+    /* wakeup the cpus which have an expired event. */
+    evt_do_broadcast(mask);
+
+    if ( next_event != STIME_MAX )
+    {
+        if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) )
+            goto again;
+    }
+    spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_init(void)
+{
+    u64 hpet_rate;
+    u32 hpet_id, cfg;
+
+    hpet_rate = hpet_setup();
+    if ( hpet_rate == 0 )
+        return;
+
+    hpet_id = hpet_read32(HPET_ID);
+    if ( !(hpet_id & HPET_ID_LEGSUP) )
+        return;
+
+    /* Start HPET legacy interrupts */
+    cfg = hpet_read32(HPET_CFG);
+    cfg |= HPET_CFG_LEGACY;
+    hpet_write32(cfg, HPET_CFG);
+
+    /* set HPET T0 as oneshot */
+    cfg = hpet_read32(HPET_T0_CFG);
+    cfg &= ~HPET_TN_PERIODIC;
+    cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+    hpet_write32(cfg, HPET_T0_CFG);
+
+    /*
+     * The period is a femto seconds value. We need to calculate the scaled
+     * math multiplication factor for nanosecond to hpet tick conversion.
+     */
+    hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32);
+    hpet_event.shift = 32;
+    hpet_event.next_event = STIME_MAX;
+    hpet_event.event_handler = handle_hpet_broadcast;
+    spin_lock_init(&hpet_event.lock);
+}
+
+void hpet_broadcast_enter(void)
+{
+    struct hpet_event_channel *ch = &hpet_event;
+
+    cpu_set(smp_processor_id(), ch->cpumask);
+
+    spin_lock(&ch->lock);
+
+    /* reprogram if current cpu expire time is nearer */
+    if ( this_cpu(timer_deadline) < ch->next_event )
+        reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1);
+
+    spin_unlock(&ch->lock);
+}
+
+void hpet_broadcast_exit(void)
+{
+    struct hpet_event_channel *ch = &hpet_event;
+    int cpu = smp_processor_id();
+
+    if ( cpu_test_and_clear(cpu, ch->cpumask) )
+        reprogram_timer(per_cpu(timer_deadline, cpu));
+}
+
+int hpet_legacy_irq_tick(void)
+{
+    if ( !hpet_event.event_handler )
+        return 0;
+    hpet_event.event_handler(&hpet_event);
+    return 1;
+}
+
+u64 hpet_setup(void)
+{
+    static u64 hpet_rate;
+    static int initialised;
+    u32 hpet_id, hpet_period, cfg;
+    int i;
+
+    if ( initialised )
+        return hpet_rate;
+    initialised = 1;
+
+    if ( hpet_address == 0 )
+        return 0;
+
+    set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+
+    hpet_id = hpet_read32(HPET_ID);
+    if ( hpet_id == 0 )
+    {
+        printk("BAD HPET vendor id.\n");
+        return 0;
+    }
+
+    /* Check for sane period (100ps <= period <= 100ns). */
+    hpet_period = hpet_read32(HPET_PERIOD);
+    if ( (hpet_period > 100000000) || (hpet_period < 100000) )
+    {
+        printk("BAD HPET period %u.\n", hpet_period);
+        return 0;
+    }
+
+    cfg = hpet_read32(HPET_CFG);
+    cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+    hpet_write32(cfg, HPET_CFG);
+
+    for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
+    {
+        cfg = hpet_read32(HPET_T0_CFG + i*0x20);
+        cfg &= ~HPET_TN_ENABLE;
+        hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
+    }
+
+    cfg = hpet_read32(HPET_CFG);
+    cfg |= HPET_CFG_ENABLE;
+    hpet_write32(cfg, HPET_CFG);
+
+    hpet_rate = 1000000000000000ULL; /* 10^15 */
+    (void)do_div(hpet_rate, hpet_period);
+
+    return hpet_rate;
+}
diff -r 672c09aad49d -r d795e15b85a7 xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Tue May 20 14:50:45 2008 +0100
+++ b/xen/arch/x86/time.c       Wed May 21 10:38:57 2008 +0100
@@ -38,7 +38,6 @@ string_param("clocksource", opt_clocksou
 #define EPOCH MILLISECS(1000)
 
 unsigned long cpu_khz;  /* CPU clock frequency in kHz. */
-unsigned long hpet_address;
 DEFINE_SPINLOCK(rtc_lock);
 unsigned long pit0_ticks;
 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */
@@ -68,7 +67,8 @@ struct platform_timesource {
 
 static DEFINE_PER_CPU(struct cpu_time, cpu_time);
 
-static u8 tsc_invariant=0;  /* TSC is invariant upon C state entry */
+/* TSC is invariant on C state entry? */
+static bool_t tsc_invariant;
 
 /*
  * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
@@ -151,6 +151,9 @@ static void timer_interrupt(int irq, voi
 {
     ASSERT(local_irq_is_enabled());
 
+    if ( hpet_legacy_irq_tick() )
+        return;
+
     /* Only for start-of-day interruopt tests in io_apic.c. */
     (*(volatile unsigned long *)&pit0_ticks)++;
 
@@ -347,47 +350,10 @@ static u32 read_hpet_count(void)
 
 static int init_hpet(struct platform_timesource *pts)
 {
-    u64 hpet_rate;
-    u32 hpet_id, hpet_period, cfg;
-    int i;
-
-    if ( hpet_address == 0 )
+    u64 hpet_rate = hpet_setup();
+
+    if ( hpet_rate == 0 )
         return 0;
-
-    set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-
-    hpet_id = hpet_read32(HPET_ID);
-    if ( hpet_id == 0 )
-    {
-        printk("BAD HPET vendor id.\n");
-        return 0;
-    }
-
-    /* Check for sane period (100ps <= period <= 100ns). */
-    hpet_period = hpet_read32(HPET_PERIOD);
-    if ( (hpet_period > 100000000) || (hpet_period < 100000) )
-    {
-        printk("BAD HPET period %u.\n", hpet_period);
-        return 0;
-    }
-
-    cfg = hpet_read32(HPET_CFG);
-    cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
-    hpet_write32(cfg, HPET_CFG);
-
-    for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
-    {
-        cfg = hpet_read32(HPET_T0_CFG + i*0x20);
-        cfg &= ~HPET_TN_ENABLE;
-        hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
-    }
-
-    cfg = hpet_read32(HPET_CFG);
-    cfg |= HPET_CFG_ENABLE;
-    hpet_write32(cfg, HPET_CFG);
-
-    hpet_rate = 1000000000000000ULL; /* 10^15 */
-    (void)do_div(hpet_rate, hpet_period);
 
     pts->name = "HPET";
     pts->frequency = hpet_rate;
@@ -1041,7 +1007,14 @@ static int __init disable_pit_irq(void)
         outb_p(0x30, PIT_MODE);
         outb_p(0, PIT_CH0);
         outb_p(0, PIT_CH0);
-    }
+
+        /*
+         * If we do not rely on PIT CH0 then we can use HPET for one-shot
+         * timer emulation when entering deep C states.
+         */
+        hpet_broadcast_init();
+    }
+
     return 0;
 }
 __initcall(disable_pit_irq);
diff -r 672c09aad49d -r d795e15b85a7 xen/common/timer.c
--- a/xen/common/timer.c        Tue May 20 14:50:45 2008 +0100
+++ b/xen/common/timer.c        Wed May 21 10:38:57 2008 +0100
@@ -35,7 +35,7 @@ struct timers {
 
 static DEFINE_PER_CPU(struct timers, timers);
 
-extern int reprogram_timer(s_time_t timeout);
+DEFINE_PER_CPU(s_time_t, timer_deadline);
 
 /****************************************************************************
  * HEAP OPERATIONS.
@@ -323,8 +323,10 @@ static void timer_softirq_action(void)
         }
 
         ts->running = NULL;
-    }
-    while ( !reprogram_timer(GET_HEAP_SIZE(heap) ? heap[1]->expires : 0) );
+
+        this_cpu(timer_deadline) = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
+    }
+    while ( !reprogram_timer(this_cpu(timer_deadline)) );
 
     spin_unlock_irq(&ts->lock);
 }
diff -r 672c09aad49d -r d795e15b85a7 xen/include/asm-x86/hpet.h
--- a/xen/include/asm-x86/hpet.h        Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/asm-x86/hpet.h        Wed May 21 10:38:57 2008 +0100
@@ -49,4 +49,24 @@
 #define hpet_write32(y,x) \
     (*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)) = (y))
 
+/*
+ * Detect and initialise HPET hardware: return counter update frequency.
+ * Return value is zero if HPET is unavailable.
+ */
+u64 hpet_setup(void);
+
+/*
+ * Callback from legacy timer (PIT channel 0) IRQ handler.
+ * Returns 1 if tick originated from HPET; else 0.
+ */
+int hpet_legacy_irq_tick(void);
+
+/*
+ * Temporarily use an HPET event counter for timer interrupt handling,
+ * rather than using the LAPIC timer. Used for Cx state entry.
+ */
+void hpet_broadcast_init(void);
+void hpet_broadcast_enter(void);
+void hpet_broadcast_exit(void);
+
 #endif /* __X86_HPET_H__ */
diff -r 672c09aad49d -r d795e15b85a7 xen/include/xen/timer.h
--- a/xen/include/xen/timer.h   Tue May 20 14:50:45 2008 +0100
+++ b/xen/include/xen/timer.h   Wed May 21 10:38:57 2008 +0100
@@ -99,6 +99,15 @@ extern void process_pending_timers(void)
  */
 extern void timer_init(void);
 
+/*
+ * Next timer deadline for each CPU.
+ * Modified only by the local CPU and never in interrupt context.
+ */
+DECLARE_PER_CPU(s_time_t, timer_deadline);
+
+/* Arch-defined function to reprogram timer hardware for new deadline. */
+extern int reprogram_timer(s_time_t timeout);
+
 #endif /* _TIMER_H_ */
 
 /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] x86: Fix lapic timer stop issue in deep C state, Xen patchbot-unstable <=